diff --git a/data/13.PCA/testSet.txt b/data/13.PCA/testSet.txt new file mode 100755 index 00000000..0e5d2a6c --- /dev/null +++ b/data/13.PCA/testSet.txt @@ -0,0 +1,1000 @@ +10.235186 11.321997 +10.122339 11.810993 +9.190236 8.904943 +9.306371 9.847394 +8.330131 8.340352 +10.152785 10.123532 +10.408540 10.821986 +9.003615 10.039206 +9.534872 10.096991 +9.498181 10.825446 +9.875271 9.233426 +10.362276 9.376892 +10.191204 11.250851 +7.720499 6.476300 +9.334379 8.471268 +7.963186 6.731333 +8.244973 9.013785 +9.569196 10.568949 +8.854793 9.076536 +9.382171 7.240862 +8.179055 8.944502 +8.267896 8.797017 +9.047165 8.725068 +8.741043 7.901385 +7.190216 7.804587 +8.081227 9.314431 +8.047460 5.720780 +7.917584 7.543254 +8.676942 10.102220 +9.210251 9.424717 +7.732998 9.840202 +7.681754 8.609897 +7.925863 10.079159 +8.261509 8.242080 +8.514872 7.527561 +10.324450 10.804481 +7.856710 7.931543 +7.858608 7.995340 +9.196808 6.577598 +9.644415 10.935081 +9.579833 9.085021 +7.888484 5.976428 +9.072624 9.703344 +8.914184 9.298515 +7.822869 7.086663 +10.538554 11.061464 +8.280187 8.709012 +8.884223 8.670105 +9.359927 10.575055 +9.078611 9.710833 +7.935134 8.586173 +8.805945 10.575145 +9.584316 9.614076 +11.269714 11.717254 +9.120444 9.019774 +7.977520 8.313923 +8.104648 9.456128 +8.617126 7.331723 +9.033811 9.469706 +8.327680 5.122092 +8.532272 10.100909 +9.295434 8.933824 +9.905202 9.027559 +10.585764 10.912733 +10.427584 11.532578 +9.072767 9.960144 +9.164275 8.645121 +9.746058 10.717080 +9.286072 9.340024 +8.188233 7.432415 +7.948598 8.445419 +7.563350 5.656178 +8.972405 8.801869 +9.980868 8.788996 +7.753490 7.714248 +7.431143 9.032819 +8.943403 8.359354 +10.481890 9.988969 +9.150454 10.278760 +8.123894 9.060351 +8.626164 8.469342 +7.354185 7.631252 +11.323046 11.015032 +8.190008 6.860792 +8.412598 7.661358 +9.258404 8.580382 +11.007915 11.443881 +8.279403 8.347003 +8.931149 10.105221 +10.239245 10.077473 +8.129346 7.096877 +8.485823 9.373561 +10.703640 11.651618 +9.500728 8.150228 +9.712414 9.910445 +9.333374 9.407557 +8.787865 10.168021 +9.238180 10.253478 +9.577388 8.895150 +10.447753 10.318227 +9.303944 9.223136 +9.883268 11.662945 +9.471921 10.443792 +10.007753 9.579912 +8.110298 7.106263 +6.964069 6.585040 +10.413499 9.649309 +8.032629 7.053254 +8.015549 9.166753 +10.462924 8.656612 +9.530788 10.134130 +9.202658 9.314222 +10.103241 10.235159 +7.849264 6.624856 +9.059071 7.992555 +10.172889 10.724789 +9.528439 6.420990 +7.190422 6.789792 +9.085716 9.846328 +9.452887 8.735386 +7.417322 7.348594 +8.468639 8.715086 +8.303642 9.463231 +9.939052 10.026771 +8.701989 7.516978 +9.737541 10.587281 +8.280233 7.852444 +10.648386 10.259203 +9.173893 10.520372 +9.135397 10.751406 +7.594580 8.488833 +8.587520 8.463406 +8.581887 7.888644 +9.448768 8.707422 +7.882664 7.772030 +10.050635 9.859720 +9.012078 9.533899 +8.770020 8.882996 +9.428804 9.446306 +8.504209 8.319693 +9.800003 10.964667 +8.069660 7.683099 +10.012217 10.320644 +8.704677 8.918146 +8.198722 7.297786 +9.868322 9.901657 +9.426997 11.480353 +9.228767 9.262976 +8.952359 9.528471 +8.186847 8.600587 +9.026371 8.705143 +9.483364 9.807079 +7.826587 7.975401 +11.197846 10.959298 +7.632421 8.769745 +8.761605 8.309365 +9.353670 8.728758 +6.466637 6.038996 +8.370634 9.178830 +10.337451 11.075600 +8.917679 8.288367 +9.076621 8.487626 +7.278948 4.634097 +10.153017 11.219183 +7.132603 5.853118 +9.338644 9.805940 +9.878602 9.187000 +10.009505 10.924505 +9.384438 10.691860 +7.535322 8.160481 +6.808732 8.268469 +8.302965 8.075009 +8.345379 8.305356 +9.517530 8.249839 +9.267825 9.999109 +10.291511 11.032664 +8.605909 8.705207 +8.331145 7.812295 +8.632412 10.574287 +8.766397 8.712107 +9.407070 9.732756 +9.709495 9.729569 +10.422201 11.070360 +6.831495 6.466763 +8.187122 8.405929 +8.523093 9.041844 +7.952394 6.801220 +10.490780 10.001468 +10.813791 9.802494 +7.861113 7.541475 +8.800399 8.738974 +7.542152 6.612838 +9.446981 9.378659 +8.281684 7.358572 +8.473801 8.208343 +11.736767 11.022029 +8.379578 8.714348 +8.313718 8.832381 +9.342589 10.416659 +7.560710 6.889648 +9.295344 9.739040 +9.176612 9.718781 +8.614385 10.150521 +9.079373 8.839794 +10.333289 10.921255 +9.453502 7.335134 +10.174590 10.292500 +9.693713 9.793636 +7.474925 7.751391 +10.107905 10.156997 +9.257241 7.854266 +10.209794 11.410157 +7.248050 6.433676 +10.150091 9.288597 +10.077713 10.321500 +8.191122 8.931519 +8.791469 10.287216 +9.229434 9.095193 +8.682571 8.546005 +7.524099 7.709751 +8.442410 8.326037 +9.364851 9.095989 +9.061222 7.557899 +7.989999 8.555363 +8.801275 8.868732 +10.351932 9.497796 +10.230710 10.496151 +9.783163 9.891408 +10.651481 9.431617 +8.387393 6.400507 +9.003921 7.050003 +8.483723 8.314886 +9.020501 7.545771 +9.329105 11.095661 +9.583687 9.271929 +8.908705 8.407529 +8.835406 8.083517 +9.736362 8.296735 +10.030302 9.737178 +8.287142 6.993460 +9.173211 9.306335 +9.026355 9.696531 +9.128391 9.921247 +11.486346 12.910777 +11.519458 11.472111 +9.027707 10.263974 +9.351935 8.542200 +9.421701 11.403201 +9.005687 8.100969 +7.015279 6.614278 +8.213607 8.340948 +8.226646 8.718997 +8.144753 8.366877 +10.133642 12.790169 +10.763481 10.847016 +10.003622 10.337716 +9.007955 9.792482 +8.670506 10.782931 +10.386414 9.956162 +10.104761 10.123044 +8.079502 8.304075 +9.945424 11.855409 +8.642497 9.998066 +9.349722 8.690328 +9.034991 8.826490 +8.738746 7.518464 +8.919532 9.740312 +9.464136 10.444588 +10.710057 12.666857 +10.042007 10.532091 +8.447996 7.426363 +9.509351 9.030516 +11.946359 10.553075 +9.981617 9.912651 +9.853876 9.632967 +10.560648 11.881714 +8.370952 9.989491 +8.323209 10.102529 +9.828359 11.702462 +8.515623 8.426754 +9.004363 9.628036 +10.529847 10.458031 +10.028765 10.624880 +9.448114 9.313227 +8.332617 7.382295 +8.323006 8.276608 +7.740771 8.799750 +8.379615 8.146192 +8.340764 9.184458 +9.863614 8.254694 +9.969563 9.405134 +9.164394 9.182127 +10.622098 9.722592 +9.592072 10.029446 +8.212027 7.477366 +9.080225 8.244448 +8.555774 7.842325 +9.958046 9.696221 +8.972573 9.797128 +9.213223 7.128437 +8.737239 9.385138 +10.333907 10.994856 +8.797511 8.643075 +11.044848 9.623160 +8.539260 9.097113 +11.582163 11.884333 +7.863848 7.176199 +6.218103 5.283562 +9.120602 7.250190 +9.001166 9.635203 +8.081476 8.844224 +9.369802 8.230911 +8.768925 8.666987 +9.841098 8.543896 +10.451522 9.549511 +9.755402 9.117522 +7.988961 6.869854 +8.872507 9.787118 +10.363980 10.716608 +6.315671 5.765953 +9.638879 9.202355 +8.588126 8.037966 +8.947408 9.144386 +9.051130 7.195132 +9.321709 8.380668 +10.146531 9.754745 +9.843373 8.891437 +9.213148 11.700632 +7.630078 7.294753 +8.093088 7.967590 +7.488915 6.090652 +8.126036 8.586472 +8.760350 7.268987 +10.201347 9.141013 +7.838208 7.307700 +6.155653 5.563997 +7.767841 6.254528 +8.425656 8.615832 +10.362168 10.886815 +10.180024 10.378934 +9.794665 10.047812 +9.970394 9.668279 +7.030217 7.060471 +9.275414 9.095738 +10.314911 10.456539 +9.259774 8.204851 +10.023919 9.558307 +8.887540 9.866704 +9.851608 9.410989 +8.710882 7.268012 +9.017007 10.217673 +7.976369 9.000979 +8.738332 8.664734 +8.344510 8.977600 +8.959613 12.324240 +9.169982 8.624635 +7.487451 8.154859 +8.706316 7.719455 +9.564832 8.940403 +8.327775 9.044509 +9.734032 10.195255 +8.021343 6.445092 +9.081048 11.024397 +7.626651 6.549263 +10.725858 8.575374 +8.731381 8.307788 +10.394237 10.596874 +7.029311 7.658832 +9.517907 7.509904 +10.394064 10.060898 +10.752500 9.431601 +9.692431 10.332130 +9.651897 7.876862 +8.592329 10.096837 +10.212801 10.827496 +9.045043 9.265524 +8.901643 8.036115 +10.794525 9.318830 +11.040915 12.021746 +8.390836 9.672469 +9.840166 11.226568 +10.806810 12.205633 +8.924285 10.934056 +8.411251 8.289672 +7.808891 9.663290 +9.733437 8.486958 +8.300026 7.477374 +8.221756 10.278308 +9.096867 9.619677 +9.410116 9.289188 +10.097176 9.768470 +9.387954 8.844855 +9.376134 7.704630 +8.231599 9.101203 +9.910738 10.694855 +8.645689 7.764589 +8.090245 7.109596 +9.253483 9.813672 +9.331546 8.039386 +9.843256 10.208792 +9.713131 9.247665 +9.259369 10.704622 +10.243948 9.695883 +6.396262 6.456390 +8.936289 8.703871 +8.750846 9.347273 +6.497155 4.130251 +9.516552 10.164848 +9.125766 8.858775 +8.374387 7.300114 +8.132816 7.621107 +10.099505 9.159134 +9.356477 6.869999 +8.112934 7.587547 +7.265396 6.987031 +11.950505 13.715109 +10.745959 10.822171 +8.893270 7.887332 +6.003473 4.960219 +7.498851 6.451334 +10.162072 9.935954 +8.732617 9.177679 +9.300827 9.952360 +11.908436 12.256801 +9.371215 9.188645 +9.943640 9.245037 +7.386450 7.046819 +8.410374 8.293218 +7.830419 6.440253 +8.263140 8.279446 +11.448164 12.192363 +8.216533 9.186628 +9.316128 10.046697 +8.156927 6.834792 +9.951421 11.240598 +9.059607 8.458446 +10.476339 10.560461 +7.548200 7.227127 +9.432204 7.236705 +9.402750 9.126413 +11.188095 13.853426 +9.520201 11.028131 +8.884154 9.764071 +8.961105 8.833117 +8.549663 8.865765 +10.111708 10.515462 +9.024761 9.169368 +7.904149 8.048756 +9.240995 7.796142 +8.126538 6.116125 +7.442148 7.931335 +9.486821 10.091359 +9.834289 11.694720 +9.009714 11.599170 +9.761314 11.344083 +6.993941 6.562988 +8.659524 8.410107 +7.685363 8.097297 +7.793217 6.519109 +8.883454 9.257347 +8.781821 9.231980 +7.946281 7.658978 +8.523959 10.646480 +9.031525 8.649648 +8.317140 7.758978 +9.192417 11.151218 +8.408486 8.282182 +10.327702 11.459048 +8.389687 8.548727 +8.642250 7.056870 +8.833447 9.267638 +8.805261 8.320281 +9.726211 9.095997 +8.477631 9.507530 +9.738838 9.652110 +8.272108 7.582696 +9.258089 8.495931 +8.334144 8.810766 +8.150904 6.486032 +7.259669 7.270156 +11.034180 11.519954 +10.705432 10.642527 +8.388814 7.159137 +8.559369 7.846284 +7.187988 6.519313 +8.811453 7.765900 +8.492762 7.992941 +8.739752 8.502909 +10.150752 10.420295 +7.062378 5.365289 +8.448195 7.480000 +10.224333 11.592750 +9.533795 9.212845 +9.519492 7.690501 +9.661847 10.376189 +7.963877 8.597193 +10.184486 9.136709 +8.505234 9.159210 +8.187646 8.518690 +9.167590 9.405917 +8.612162 8.518755 +10.970868 10.392229 +9.603649 9.141095 +9.704263 8.830178 +9.657506 8.132449 +9.337882 11.045306 +9.521722 9.537764 +8.954197 8.728179 +8.635658 10.352662 +8.910816 9.020317 +9.900933 9.392002 +10.247105 8.289649 +9.571690 8.171237 +7.388627 7.668071 +8.354008 10.074590 +9.775598 8.835696 +8.768913 7.983604 +8.330199 8.474098 +8.169356 9.361172 +10.346522 10.086434 +7.976144 9.266702 +8.429648 7.865824 +11.261674 11.788587 +10.051066 10.112425 +8.954626 9.789343 +8.382220 8.121012 +9.820642 9.426441 +8.125950 9.695087 +8.646465 7.291808 +8.190202 8.003737 +8.773887 7.306175 +8.731000 10.300436 +9.163098 7.816769 +9.456346 9.223922 +9.645180 9.324053 +8.835060 8.966915 +9.325950 10.943248 +9.941912 9.548535 +9.282799 10.119488 +9.567591 9.462164 +8.529019 9.768001 +9.314824 10.153727 +8.264439 8.273860 +8.307262 8.214036 +9.122041 8.657861 +8.404258 8.389365 +7.828355 8.419433 +9.803180 10.108286 +8.662439 8.581953 +8.883265 8.978377 +8.012330 8.262451 +9.420258 8.974878 +7.015415 6.365940 +9.888832 11.163036 +9.677549 10.346431 +8.410158 7.912899 +9.464147 10.762900 +7.067227 7.035717 +9.320923 10.583089 +9.056917 8.771241 +8.110004 8.387789 +10.310021 10.970014 +8.211185 8.809627 +8.942883 8.840746 +9.479958 8.328700 +8.973982 8.702291 +8.519257 8.764855 +9.424556 8.956911 +7.222919 8.177787 +8.257007 9.700619 +9.778795 9.296134 +8.028806 8.575974 +9.886464 9.965076 +9.090552 6.978930 +9.605548 10.256751 +9.959004 9.610229 +8.308701 9.509124 +7.748293 9.685933 +8.311108 9.428114 +9.697068 10.217956 +9.582991 9.478773 +9.167265 10.198412 +10.329753 10.406602 +8.908819 7.428789 +10.072908 10.393294 +7.992905 9.226629 +8.907696 7.269366 +8.421948 9.342968 +7.481399 7.225033 +10.358408 10.166130 +8.786556 10.279943 +9.658701 11.379367 +10.167807 9.417552 +8.653449 8.656681 +8.020304 8.671270 +8.364348 10.004068 +9.119183 9.788199 +8.405504 9.740580 +11.020930 11.904350 +9.755232 9.515713 +10.059542 9.589748 +8.727131 9.777998 +7.666182 6.028642 +8.870733 8.367501 +9.340446 7.707269 +9.919283 10.796813 +7.905837 8.326034 +10.181187 10.089865 +8.797328 8.981988 +8.466272 7.765032 +10.335914 12.620539 +9.365003 8.609115 +8.011017 7.249489 +10.923993 13.901513 +7.074631 7.558720 +9.824598 8.851297 +8.861026 8.370857 +10.127296 10.861535 +10.548377 10.855695 +8.880470 7.948761 +8.901619 9.674705 +7.813710 9.246912 +10.128808 10.560668 +11.096699 10.911644 +8.551471 6.871514 +8.907241 8.677815 +10.571647 10.294838 +8.815314 8.810725 +8.453396 8.339296 +9.594819 11.487580 +10.714211 9.628908 +7.428788 7.712869 +10.892119 12.747752 +9.024071 11.112692 +7.803375 7.847038 +8.521558 8.881848 +9.742818 11.520203 +9.832836 9.180396 +8.703132 10.028498 +9.905029 11.347606 +10.037536 8.882688 +8.629995 8.392863 +9.583497 9.219663 +8.781687 9.650598 +9.344119 9.537024 +10.407510 9.223929 +7.244488 6.559021 +10.643616 10.288383 +8.757557 6.947901 +10.784590 11.233350 +10.028427 11.330033 +7.968361 6.830308 +8.925954 8.539113 +7.738692 7.114987 +8.192398 8.352016 +10.412017 12.431122 +8.208801 5.777678 +7.820077 7.790720 +9.542754 11.542541 +6.817938 7.429229 +7.365218 7.956797 +9.274391 7.932700 +9.546475 8.803412 +7.471734 6.797870 +8.016969 7.848070 +8.852701 8.458114 +8.215012 8.468330 +6.975507 6.846980 +9.435134 10.609700 +9.228075 9.342622 +8.388410 7.637856 +7.111456 9.289163 +9.403508 8.482654 +9.133894 8.343575 +10.670801 9.750821 +9.983542 10.074537 +10.012865 8.537017 +8.929895 8.951909 +7.666951 7.473615 +9.493839 7.821783 +8.894081 7.059413 +9.593382 9.859732 +9.126847 8.395700 +9.532945 9.850696 +9.459384 9.384213 +8.982743 8.217062 +10.107798 8.790772 +10.563574 9.044890 +8.278963 9.518790 +8.734960 10.494129 +9.597940 9.530895 +10.025478 9.508270 +10.335922 10.974063 +8.404390 8.146748 +7.108699 6.038469 +8.873951 7.474227 +8.731459 8.154455 +8.795146 7.534687 +6.407165 6.810352 +9.979312 10.287430 +8.786715 8.396736 +10.753339 10.360567 +10.508031 10.321976 +10.636925 10.193797 +10.614322 11.215420 +8.916411 8.965286 +8.112756 8.304769 +10.833109 10.497542 +8.319758 9.727691 +9.945336 11.820097 +10.150461 9.914715 +10.185024 10.388722 +9.793569 9.079955 +10.590128 11.811596 +8.505584 6.884282 +10.461428 10.745439 +8.755781 9.418427 +7.488249 7.172072 +10.238905 10.428659 +9.887827 10.427821 +8.529971 8.838217 +8.375208 10.242837 +8.901724 8.398304 +8.607694 9.173198 +8.691369 9.964261 +9.584578 9.641546 +10.265792 11.405078 +7.592968 6.683355 +8.692791 9.389031 +7.589852 6.005793 +10.550386 11.736584 +8.578351 7.227055 +7.526931 6.875134 +8.577081 9.877115 +9.272136 11.050928 +10.300809 10.653059 +8.642013 9.006681 +9.720491 10.265202 +9.029005 9.646928 +8.736201 7.975603 +8.672886 9.070759 +8.370633 8.412170 +9.483776 9.183341 +6.790842 7.594992 +9.842146 10.156810 +9.563336 7.962532 +8.724669 9.870732 +9.012145 9.171326 +9.116948 9.791167 +6.219094 7.988420 +9.468422 8.359975 +8.825231 8.475208 +9.572224 9.696428 +9.609128 8.488175 +9.428590 10.468998 +8.293266 8.617701 +9.423584 10.355688 +9.240796 9.517228 +10.915423 13.026252 +10.854684 11.130866 +9.226816 9.391796 +9.580264 10.359235 +7.289907 6.898208 +9.338857 10.374025 +9.523176 11.332190 +10.162233 10.357396 +8.873930 9.207398 +8.607259 7.794804 +8.852325 8.215797 +8.077272 6.501042 +8.169273 8.269613 +6.806421 7.544423 +8.793151 9.691549 +11.640981 11.365702 +9.544082 11.576545 +9.009266 9.605596 +9.726552 9.426719 +9.495888 10.626624 +8.683982 9.337864 +8.322105 8.631099 +8.887895 8.644931 +8.662659 11.373025 +9.263321 7.536016 +7.802624 7.171625 +8.773183 8.561565 +8.730443 10.197596 +8.942915 7.758383 +8.057618 8.774996 +8.112081 8.202349 +10.378884 12.103755 +9.248876 8.637249 +9.739599 9.708576 +8.126345 8.278487 +8.894788 7.966117 +9.683165 9.019221 +10.886957 12.053843 +9.668852 10.902132 +7.486692 6.471138 +8.794850 9.173609 +8.835915 8.296727 +9.443984 11.375344 +8.696621 6.434580 +9.645560 9.233722 +9.623857 7.915590 +10.840632 12.620268 +7.298135 7.356141 +9.639644 8.902389 +9.849802 7.682624 +10.609964 10.259615 +9.768229 11.382811 +7.646351 7.571849 +10.230300 9.470859 +8.224402 8.496866 +6.879671 8.393648 +7.976247 8.667221 +9.183268 8.694550 +11.471853 12.786280 +10.428349 10.615726 +8.090828 5.902504 +9.738627 8.485792 +8.139709 8.396333 +9.508055 8.990529 +8.857260 8.497732 +8.902558 7.014433 +9.660607 11.040833 +8.772221 10.512150 +11.020038 9.354134 +7.918527 7.742062 +7.630835 7.756260 +11.043272 11.041613 +9.299376 8.674157 +9.795087 8.431837 +9.415683 8.312101 +7.942037 6.942913 +9.724790 11.766496 +10.222032 11.550876 +8.894163 8.306020 +8.394309 8.070420 +9.012776 6.880548 +9.661093 10.138921 +9.896472 9.762372 +9.135628 8.759928 +8.762656 10.306028 +8.602473 8.861956 +10.085297 10.464774 +10.644983 10.945767 +9.034571 8.391668 +8.602920 8.501944 +8.224766 7.402758 +8.755050 9.431085 +9.669937 8.641049 +10.693530 10.287124 +9.462806 7.611153 +9.287707 10.082363 +10.941260 10.783728 +9.263080 7.913328 +10.167111 10.225338 +8.783830 9.465345 +8.958624 8.662136 +9.841649 9.926781 +7.205691 6.790638 +8.629089 9.135461 +7.469440 8.450442 +8.179133 7.790434 +8.083984 7.875520 +9.271300 8.135359 +8.652349 8.254397 +7.983920 6.609684 +7.836860 9.785238 +7.418535 7.011256 +8.458288 10.095364 +9.387605 9.726911 +8.663951 8.206705 +10.146507 11.698577 +8.937103 10.990924 +11.218687 11.141945 +8.363142 9.106936 +7.877643 7.122922 +9.620978 9.905689 +9.509649 10.773209 +6.748743 6.705385 +9.300919 8.085029 +9.332257 9.818791 +7.898610 8.366643 +9.841914 9.480675 +6.920484 8.959501 +8.544713 9.563136 +8.162266 6.715277 +8.659552 9.282008 +10.673398 13.174824 +9.024000 10.379238 +8.183292 6.647572 +10.544919 10.649602 +7.201266 6.529605 +9.557407 11.096821 +8.304605 6.940929 +9.742855 9.920897 +10.024587 9.645222 +10.002296 9.998940 +8.965876 8.665419 +7.823136 6.949572 +8.125088 7.654065 +6.569589 6.046863 +10.195497 8.689129 +11.730011 10.374221 +8.739105 7.457571 +9.820059 10.278526 +9.547456 10.398198 +8.375072 8.416302 +8.889533 8.308929 +8.861201 9.290408 +12.677687 12.788463 +9.100735 8.620537 +7.728350 6.328219 +7.955373 8.355028 +8.733352 8.645414 +10.257527 11.191813 +9.246413 9.497014 +9.745302 9.642035 +7.785652 8.147621 +7.431673 8.566399 +8.654384 8.466701 +8.475392 6.744677 +9.968440 10.765192 +10.163616 10.806963 +10.238135 10.036636 +9.902889 10.746730 +9.523850 8.749708 +9.214363 9.149178 +9.266040 10.841502 +8.494292 7.770942 +10.821158 10.410192 +8.645888 7.970308 +9.885204 10.098080 +9.084990 10.886349 +9.277874 8.871449 +8.135131 7.137064 +7.917379 9.080522 +9.685586 8.822850 +8.558141 7.848112 +9.502917 10.061255 +6.409004 5.164774 +10.149235 10.579951 +7.847304 8.411351 +8.846930 6.819939 +8.675153 9.411147 +9.476276 9.061508 +11.099184 10.644263 +8.792411 10.379405 +8.400418 7.072706 +8.555713 7.923805 +8.024763 8.426993 +8.642696 10.453412 +7.906117 7.920408 +8.793393 9.722878 +8.280364 7.669854 +9.387766 9.706245 +9.626853 10.762499 +10.163631 10.919007 +9.375543 11.513524 +9.309440 8.575699 +10.055329 10.297255 +8.706241 9.097172 +10.032934 11.951897 +10.812974 11.311435 +10.352603 10.819865 +8.276870 9.055403 +8.397389 7.944434 +9.371741 10.395790 +10.825710 10.144099 +9.158483 11.385382 +10.658639 11.389856 +8.091762 6.631039 +10.734892 10.054598 +11.535880 11.604912 +9.799077 11.371677 +8.478725 9.078455 +9.399902 8.947744 +7.305377 8.144973 +7.613377 6.668798 +10.681308 10.830845 +9.973855 10.004133 +9.369918 7.855433 +8.838223 7.429033 +9.521831 10.623930 +9.724419 10.447452 +8.890224 9.275923 +9.932763 11.589953 +10.839337 9.051250 +8.497708 7.521701 +8.440236 8.705670 +9.063566 9.755744 +8.449647 8.929485 +8.554576 8.063231 +10.348606 10.550718 +5.985254 5.186844 +9.931937 10.175582 +9.854922 9.201393 +9.114580 9.134215 +10.334899 8.543604 diff --git a/docs/1.机器学习基础.md b/docs/1.机器学习基础.md index c2d66dba..699c66e1 100644 --- a/docs/1.机器学习基础.md +++ b/docs/1.机器学习基础.md @@ -18,7 +18,7 @@ * 在分类算法中目标变量的类型通常是标称型的,而在回归算法中通常是连续型的。 * 机器学习的训练过程 - * ![机器学习训练过程图](./1.机器学习基础训练过程.png) + * ![机器学习训练过程图](/images/1.MLFoundation/机器学习基础训练过程.png) * 监督学习 * 必须知道预测什么,即必须知道目标变量的分类信息。分类和回归属于监督学习。 @@ -43,7 +43,7 @@ * 想要完成何种任务,比如是预测明天下雨的概率还是对投票者按照兴趣分组;如果想要预测目标变量的值,则可以选择监督学习算法,否则可以选择无监督学习算法。 * 需要分析或收集的数据是什么 * 举例 -* ![选择算法图](./1.机器学习基础-选择算法.png) +* ![选择算法图](/images/1.MLFoundation/机器学习基础-选择算法.png) * 开发的步骤 * 1.收集数据 diff --git a/docs/11.使用Apriori算法进行关联分析.md b/docs/11.使用Apriori算法进行关联分析.md index 2dc4a838..04503a60 100644 --- a/docs/11.使用Apriori算法进行关联分析.md +++ b/docs/11.使用Apriori算法进行关联分析.md @@ -14,7 +14,7 @@ * 2.关联规则(association rules): 暗示两种物品之间可能存在很强的关系 * 总结:首先需要找到频繁项集,才能找到关联规则。 * 如下图: - * ![交易清单](./11.交易清单.png) + * ![交易清单](/images/11.Apriori/交易清单.png) * 支持度(support) * 数据集中包含该项集的记录所占的比例 * 例如上图中:{豆奶}的支持度=4/5, {豆奶,尿布}的支持度=3/5 @@ -36,10 +36,10 @@ * 如果某个项集是频繁的,那么它的所有子集也是频繁的,反之,一个项集是非频繁的,那么它的所有超集也是非频繁的。 * 例如: 我们假设知道{2, 3}是非频繁项,那么{0, 2, 3}, {1, 2, 3}, {0, 1, 2, 3}都是非频繁项。 * 如下图: - * ![非频繁项集](./11.非频繁项集.png) + * ![非频繁项集](/images/11.Apriori/非频繁项集.png) * 分级法: 频繁项集->关联规则 * 1.首先从一个频繁项集开始,接着创建一个规则列表,其中规则右部分只包含一个元素,然后对这个规则进行测试。 * 2.接下来合并所有剩余规则来创建一个新的规则列表,其中规则右部包含两个元素。 * 如下图: - * ![所有可能的项集组合](./11.所有可能的项集组合.png) + * ![所有可能的项集组合](/images/11.Apriori/所有可能的项集组合.png) * 最后: 每次增加频繁项集的大小,Apriori算法都会重新扫描整个数据集,是否有优化空间呢? 下一章:FP-growth算法等着你的到来 diff --git a/docs/13.利用PCA来简化数据.md b/docs/13.利用PCA来简化数据.md index 143e2ac4..cfa3c031 100644 --- a/docs/13.利用PCA来简化数据.md +++ b/docs/13.利用PCA来简化数据.md @@ -1,6 +1,70 @@ - # 3) 利用PCA来简化数据 - -* 未完待续 +> 场景描述: + +* 我们正通过电视而非现场观看体育比赛,在电视的纯平显示器上有一个球。 +* 显示器大概包含了100万像素,而球则可能是由较少的像素组成,例如说一千个像素。 +* 人们实时的将显示器上的百万像素转换成为一个三维图像,该图像就给出运动场上球的位置。 +* 在这个过程中,人们已经将数据从一百万维降至了三维。这就被称为`降维(dimensionality reduction)` + +## 1 将维技术 + +> 数据显示并非大规模特征下的唯一难题,对数据进行简化还有如下一系列的原因: + +* 1) 使得数据集更容易使用 +* 2) 降低很多算法的计算开销 +* 3) 去除噪音 +* 4) 是的结果易懂 + +> 适用范围: + +* 在已标注与未标注的数据上都有降维技术。 +* 这里我们将主要关注未标注数据上的降维技术,将技术同样也可以应用于已标注的数据。 + +> 在以下3种降维技术中, PCA的应用目前最为广泛,因此本章主要关注PCA。 + +* 1) 主成分分析(Principal Component Analysis, PCA) + * `通俗理解:就是找出一个最主要的特征,然后进行分析。` + * `例如: 考察一个人的智力情况,就直接看数学成绩就行(存在:数学、语文、英语成绩)` + * a.将数据从原来的坐标系转换到了新的坐标系,新的坐标系的选择是由数据本身决定的。 + * b.第一个新坐标轴选择的是原始数据中`方差最大`的方向 + * c.第二个新坐标轴的选择和第一个坐标轴`正交(orthogonal 如果是二维空间就叫垂直)`且具有`最大方差`的方向。 + * d.该过程一直重复,重复次数为原始数据中特征的数目。 + * 我们会发现,大部分方差都包含在最前面的几个新坐标轴中。因此,我们可以忽略余下的坐标轴,即对数据进行了降维处理。 +* 2) 因子分析(Factor Analysis) + * `通俗理解:将多个实测变量转换为少数几个综合指标,它反映一种降维的思想.通过降维将相关性高的变量聚在一起,从而减少需要分析的变量的数量,而减少问题分析的复杂性` + * `例如: 考察一个人的整体情况,就直接组合3样成绩(隐变量),看平均成绩就行(存在:数学、语文、英语成绩)` + * 应用的领域:社会科学、金融和其他领域 + * 在因子分析中,我们 + * 假设观察数据的生成中有一些观察不到的隐变量(latent variable)。 + * 假设观察数据是这些隐变量和某些噪音的线性组合。 + * 那么隐变量的数据可能比观察数据的数目少,也就说通过找到隐变量就可以实现数据的降维。 +* 3) 独立成分分析(Independ Component Analysis, ICA) + * `通俗理解:PCA(主成分分析)寻找的是,使得投影之后,尽量保留原有信息量的投影方向。 ICA(独立主成分分析)寻找的是,使得投影之后,数据之间相互独立的投影方向。` + * `例如:我们去ktv唱歌,想辨别唱的是哪首歌,PCA就是搜录歌词;而ICA是对歌词按人进行完全的拆分。` + * ICA假设数据是从N个数据源生成的,这一点和因子分析有些类似。 + * 假设数据为多个数据源的混合观察结果,这些数据源之间在统计上是相互独立的,而在PCA中只假设数据是不相关的。 + * 同因子分析一样,如果数据源的数目少于观察数据的数目,则可以实现降维过程。 + +## 2 主成分分析(PCA) + +> PCA的优缺点 + +* 优点:降低数据的复杂性,识别最重要的多个特征。 +* 缺点:不一定需要,且可能损失有用信息。 +* 适用数据类型:数值型数据。 + +> 通过PCA进行降维处理,我们就可以同时获得SVM和决策树的优点: + +* 一方面,得到了和决策树一样简单的分类器,同时分类间隔和SVM一样好。 + * 1.第一个主成分就是来自于数据差异性最大(即 方差最大)的方向提取出来 + * 2.第二个主成分就是来自于数据差异性次大的方向,并且该方向于第一个主成分方向正交。 + * 3.通过数据集的协方差矩阵及其特征值分析,我们就可以得到这些主成分的值。 + * 一旦得到了协方差矩阵的特征向量,我们就可以保留最大的N个值。这些特征向量也给出了N个最重要特征的真实结构。我们可以通过将数据乘上这N个特征向量而将它转换到新的空间。 +* 例如下图: +* ![应用PCA降维](/images/13.PCA/应用PCA降维.png) + +## 3 对半导体数据进行降维处理 + + \ No newline at end of file diff --git a/docs/3.决策树.md b/docs/3.决策树.md index 3d6e6ed7..7f0bd058 100644 --- a/docs/3.决策树.md +++ b/docs/3.决策树.md @@ -24,4 +24,4 @@ * 基尼不纯度(Gini impurity) [本书不做过多的介绍] * 简单来说:就是从一个数据集中随机选取子项,度量其被错误分类到其他分组里的概率。 * 流程介绍图 -* ![决策树流程介绍图](./3.决策树流程介绍图.jpg) +* ![决策树流程介绍图](/images/3.DecisionTree/决策树流程介绍图.jpg) diff --git a/docs/7.利用AdaBoost元算法提高分类.md b/docs/7.利用AdaBoost元算法提高分类.md index e9cdc3a7..3c6d6621 100644 --- a/docs/7.利用AdaBoost元算法提高分类.md +++ b/docs/7.利用AdaBoost元算法提高分类.md @@ -32,18 +32,18 @@ * 单层决策树(decision stump, 也称决策树桩)是一种简单的决策树。 * 过拟合(overfitting, 也称为过学习) * 发现测试错误率在达到一个最小值之后有开始上升,这种现象称为过拟合。 - * ![过拟合](./7.过拟合.png) + * ![过拟合](/images/7.AdaBoost/过拟合.png) * 非均衡分类问题 * 现象: * 判断马是否能继续生存 * 过滤垃圾邮件 * ROC曲线: 最佳的分类器应该尽可能地处于左上角 - * ![ROC曲线](./7.ROC曲线.png) + * ![ROC曲线](/images/7.AdaBoost/ROC曲线.png) * 对不同的ROC曲线进行比较的一个指标是曲线下的面积(Area Unser the Curve, AUC). * AUC给出的是分类器的平均性能值,当然它并不能完全代替对整条曲线的观察。 * 一个完美分类器的AUC为1,而随机猜测的AUC则为0.5。 * 基于代价函数的分类器决策控制:`TP*(-5)+FN*1+FP*50+TN*0` - * ![代价函数](./7.代价函数.png) + * ![代价函数](/images/7.AdaBoost/代价函数.png) * 欠抽样(undersampling)或者过抽样(oversampling) * 欠抽样: 意味着删除样例 * 过抽样: 意味着复制样例(重复使用) diff --git a/docs/1.机器学习基础-选择算法.png b/images/1.MLFoundation/机器学习基础-选择算法.png similarity index 100% rename from docs/1.机器学习基础-选择算法.png rename to images/1.MLFoundation/机器学习基础-选择算法.png diff --git a/docs/1.机器学习基础训练过程.png b/images/1.MLFoundation/机器学习基础训练过程.png similarity index 100% rename from docs/1.机器学习基础训练过程.png rename to images/1.MLFoundation/机器学习基础训练过程.png diff --git a/docs/11.交易清单.png b/images/11.Apriori/交易清单.png similarity index 100% rename from docs/11.交易清单.png rename to images/11.Apriori/交易清单.png diff --git a/docs/11.所有可能的项集组合.png b/images/11.Apriori/所有可能的项集组合.png similarity index 100% rename from docs/11.所有可能的项集组合.png rename to images/11.Apriori/所有可能的项集组合.png diff --git a/docs/11.非频繁项集.png b/images/11.Apriori/非频繁项集.png similarity index 100% rename from docs/11.非频繁项集.png rename to images/11.Apriori/非频繁项集.png diff --git a/images/13.PCA/应用PCA降维.png b/images/13.PCA/应用PCA降维.png new file mode 100644 index 00000000..d9807d02 Binary files /dev/null and b/images/13.PCA/应用PCA降维.png differ diff --git a/docs/3.决策树流程介绍图.jpg b/images/3.DecisionTree/决策树流程介绍图.jpg similarity index 100% rename from docs/3.决策树流程介绍图.jpg rename to images/3.DecisionTree/决策树流程介绍图.jpg diff --git a/docs/7.ROC曲线.png b/images/7.AdaBoost/ROC曲线.png similarity index 100% rename from docs/7.ROC曲线.png rename to images/7.AdaBoost/ROC曲线.png diff --git a/docs/7.代价函数.png b/images/7.AdaBoost/代价函数.png similarity index 100% rename from docs/7.代价函数.png rename to images/7.AdaBoost/代价函数.png diff --git a/docs/7.过拟合.png b/images/7.AdaBoost/过拟合.png similarity index 100% rename from docs/7.过拟合.png rename to images/7.AdaBoost/过拟合.png diff --git a/src/python/13.PCA/pca.py b/src/python/13.PCA/pca.py new file mode 100644 index 00000000..86ee7958 --- /dev/null +++ b/src/python/13.PCA/pca.py @@ -0,0 +1,45 @@ +#!/usr/bin/python +# coding:utf8 + +''' +Created on Jun 1, 2011 +Update on 2017-04-06 +@author: Peter Harrington/片刻 +''' +print(__doc__) +from numpy import * + + +def loadDataSet(fileName, delim='\t'): + fr = open(fileName) + stringArr = [line.strip().split(delim) for line in fr.readlines()] + datArr = [map(float,line) for line in stringArr] + return mat(datArr) + + +def pca(dataMat, topNfeat=9999999): + meanVals = mean(dataMat, axis=0) + meanRemoved = dataMat - meanVals #remove mean + covMat = cov(meanRemoved, rowvar=0) + eigVals,eigVects = linalg.eig(mat(covMat)) + eigValInd = argsort(eigVals) #sort, sort goes smallest to largest + eigValInd = eigValInd[:-(topNfeat+1):-1] #cut off unwanted dimensions + redEigVects = eigVects[:,eigValInd] #reorganize eig vects largest to smallest + lowDDataMat = meanRemoved * redEigVects#transform data into new dimensions + reconMat = (lowDDataMat * redEigVects.T) + meanVals + return lowDDataMat, reconMat + + +def replaceNanWithMean(): + datMat = loadDataSet('secom.data', ' ') + numFeat = shape(datMat)[1] + for i in range(numFeat): + meanVal = mean(datMat[nonzero(~isnan(datMat[:,i].A))[0],i]) #values that are not NaN (a number) + datMat[nonzero(isnan(datMat[:,i].A))[0],i] = meanVal #set NaN values to mean + return datMat + + +if __name__ == "__main__": + dataMat = loadDataSet('data/13.PCA/testSet.txt') + lowDmat, reconMat = pca(dataMat, 1) + print shape(lowDmat)