(*NotebookFileLineBreakTest
NotebookFileLineBreakTest*)
(*NotebookOptionsPosition[      9154,        281]*)
(*NotebookOutlinePosition[      9926,        308]*)
(*  CellTagsIndexPosition[      9882,        304]*)
(*WindowFrame->Normal*)

Notebook[{

Cell["Principal Component Analysis", "Title"],

Cell[CellGroupData[{

Cell["Sample Covariance Matrix", "Section"],

Cell["\<\
Convert the matrix a to mean deviation form and calculate its \
sample covariance matrix. (See Lay 7.5, pp489-491)\
\>", "Text"], (See Lay 7.5, pp489-491)\ \>", "Text"], Cell[BoxData[{ RowBox[{\(Clear[a];\), "\[IndentingNewLine]"}], "\[IndentingNewLine]", RowBox[{ RowBox[{ RowBox[{"a", "=", RowBox[{"(", GridBox[{ {"120", "125", "125", "135", "145"}, {"61", "60", "64", "68", "72"} }], ")"}]}], ";"}], "\[IndentingNewLine]"}], "\[IndentingNewLine]", \(ListPlot[ Transpose[a], \[IndentingNewLine]ImageSize \[Rule] 400, \[IndentingNewLine]AxesLabel \[Rule] {"\", "\"}, \[IndentingNewLine]PlotStyle \[Rule] {PointSize[0.02], Red}];\)}], "Input"], Cell[TextData[{ "Write a procedure to calculate the ", StyleBox["sample mean", FontWeight->"Bold"], " of a matrix and use it to calculate the sample mean of a." }], "Text"], Cell[BoxData[ \(sampleMean[ a_?MatrixQ] := \[IndentingNewLine]Module[{n = \(Dimensions[ a]\)[\([2]\)]}, \[IndentingNewLine]\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \(1\/n\) Apply[Plus, Transpose[a]]]\)], "Input"], Cell[BoxData[{ \(\(m = sampleMean[a];\)\), "\[IndentingNewLine]", \(% // MatrixForm\)}], "Input"], Cell[TextData[{ "Write a procedure to calculate the ", StyleBox["mean deviation form", FontWeight->"Bold"], " of a matrix and use it to calculate the mean deviation form of a." }], "Text"], Cell[BoxData[ \(<< LinearAlgebra`MatrixManipulation`\)], "Input"], Cell[BoxData[ \(\(meanDeviationForm[ a_?MatrixQ] := \[IndentingNewLine]Module[{m = sampleMean[a], n = \(Dimensions[ a]\)[\([2]\)]}, \[IndentingNewLine]\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Transpose[ Table[\(Transpose[a]\)[\([k]\)] - m, \[IndentingNewLine]\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ {k, 1, n}]]];\)\)], "Input"], Cell[BoxData[{ \(\(b = meanDeviationForm[a];\)\), "\[IndentingNewLine]", \(% // MatrixForm\[IndentingNewLine]\), "\[IndentingNewLine]", \(\(ListPlot[ Transpose[b], \[IndentingNewLine]ImageSize \[Rule] 400, \[IndentingNewLine]AxesLabel \[Rule] {"\", \ "\"}, \[IndentingNewLine]PlotStyle \[Rule] {PointSize[0.02], Red}];\)\)}], "Input"], Cell["Check: b should have zero sample mean.", "Text"], Cell[BoxData[{ \(\(sampleMean[b];\)\), "\[IndentingNewLine]", \(% // MatrixForm\)}], "Input"], Cell[TextData[{ "Write a procedure to calculate the ", StyleBox["sample covariance matrix", FontWeight->"Bold"], " of a given matrix and use it to calculate the sample covariance matrix of \ a." }], "Text"], Cell[BoxData[ RowBox[{ RowBox[{ StyleBox["sampleCovarianceMatrix", FontWeight->"Bold"], "[", \(a_?MatrixQ\), "]"}], ":=", "\[IndentingNewLine]", \(Module[{n = \(Dimensions[ a]\)[\([2]\)]}, \[IndentingNewLine]\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \(1\/\(n - 1\)\) a . Transpose[a]]\)}]], "Input"], Cell[BoxData[{ RowBox[{ RowBox[{"s", "=", RowBox[{ StyleBox["sampleCovarianceMatrix", FontWeight->"Bold"], "[", "b", "]"}]}], ";"}], "\[IndentingNewLine]", \(% // MatrixForm\)}], "Input"] }, Open ]], Cell[CellGroupData[{ Cell["Principal Components", "Section"], Cell["\<\ Bring forward the sample covariance matrix from the previous \ example.\ \>", "Text"], Cell[BoxData[ \(s // MatrixForm\)], "Input"], Cell["\<\ We seek to diagonalize the matrix s. Calculate the eigendata for s.\ \>", "Text"], Cell[BoxData[ \({evals, evecs} = Eigensystem[s]\)], "Input"], Cell["\<\ Construct d, the diagonal matrix of eigenvalues in decreasing \ order.\ \>", "Text"], Cell[BoxData[{ \(\(d = DiagonalMatrix[evals] // N;\)\), "\[IndentingNewLine]", \(% // MatrixForm\)}], "Input"], Cell["\<\ Construct the orthonormal matrix p, consisting of normalized \ eigenvectors of s.\ \>", "Text"], Cell[BoxData[{ \(\(p = Transpose[ Table[evecs[\([k]\)]/ Norm[evecs[\([k]\)] // N], \[IndentingNewLine]{k, 2}]];\)\), "\[IndentingNewLine]", \(% // MatrixForm\)}], "Input"], Cell["Check the diagonalization.", "Text"], Cell[BoxData[ \(s \[Equal] p . d . Transpose[p]\)], "Input"], Cell["The columns of p are the principal components of the data.", "Text"], Cell[BoxData[{ \(<< Graphics`Graphics`\), "\[IndentingNewLine]", \(<< Graphics`Arrow`\)}], "Input"], Cell[BoxData[ \(showColorfulVectors[vecs_, color_, opts___] := Show[Graphics[ Flatten[{Thickness[ .004], color, Table[Arrow[vecs[\([i, 1]\)], vecs[\([i, 1]\)] + vecs[\([i, 2]\)]], {i, Length[vecs]}]}]], \ opts, \ AspectRatio \[Rule] Automatic, \ Axes \[Rule] True]\)], "Input"], Cell[BoxData[{ \({u1, u2} = Transpose[p]\), "\[IndentingNewLine]", \(\(o = {0, 0};\)\[IndentingNewLine]\), "\[IndentingNewLine]", \(\(DisplayTogether[\[IndentingNewLine]ListPlot[ Transpose[b], \[IndentingNewLine]ImageSize \[Rule] 400, \[IndentingNewLine]AxesLabel \[Rule] {"\", \ "\"}, \[IndentingNewLine]PlotStyle \[Rule] {PointSize[0.02], Red}], \[IndentingNewLine]Show[ showColorfulVectors[{{o, 10\ u1}, {o, 10 u2}}, ManganeseBlue]]];\)\)}], "Input"], Cell["\<\ Use the principal components to define and relate the variables X \ and Y. X represents the original data. Y represents the transformed data. The change of basis matrix p is orthonormal.\ \>", "Text"], Cell[BoxData[{ \(\(x = {x1, x2};\)\), "\[IndentingNewLine]", \(\(\(y = {y1, y2};\)\(\[IndentingNewLine]\) \)\), "\[IndentingNewLine]", \(\(y = Transpose[p] . x;\)\), "\[IndentingNewLine]", \(% // MatrixForm\)}], "Input"], Cell["\<\ The matrix d is the covariance matrix for the transformed data. y1 and y2 are independent.\ \>", "Text"], Cell[BoxData[ \(d // MatrixForm\)], "Input"], Cell[BoxData[{ \(\(v1 = {1, 0};\)\), "\[IndentingNewLine]", \(\(v2 = {0, 1};\)\), "\[IndentingNewLine]", \(\(o = {0, 0};\)\[IndentingNewLine]\), "\[IndentingNewLine]", \(\(DisplayTogether[\[IndentingNewLine]ListPlot[ Transpose[Transpose[p] . b], \[IndentingNewLine]ImageSize \[Rule] 400, \[IndentingNewLine]AxesLabel \[Rule] {"\", "\ \"}, \[IndentingNewLine]PlotStyle \[Rule] {PointSize[0.02], Red}], \[IndentingNewLine]Show[ showColorfulVectors[{{o, 10\ v1}, {o, 10 v2}}, ManganeseBlue]]];\)\)}], "Input"], Cell["\<\ Calculate the percentage of the total variance contained in the \ first principal component. Tr[d] is the trace of the matrix d.\ \>", "Text"], Cell[BoxData[{ \(\(\(firstPC = u1\)\(\[IndentingNewLine]\) \)\), "\[IndentingNewLine]", \(percentVarianceFirstPC = d[\([1, 1]\)]/Tr[d]\)}], "Input"] }, Open ]], Cell["\<\ The first principal component accounts for 98.4% of the variance in \ this data.\ \>", "Text"] }, FrontEndVersion->"5.1 for Macintosh", ScreenRectangle->{{4, 1280}, {0, 832}}, CellGrouping->Manual, WindowSize->{783, 791}, WindowMargins->{{63, Automatic}, {Automatic, 0}}, PrintingCopies->1, PrintingPageRange->{1, Automatic}, CellLabelAutoDelete->True, StyleDefinitions -> "Report.nb" ] (******************************************************************* Cached data follows. If you edit this Notebook file directly, not using Mathematica, you must remove the line containing CacheID at the top of the file. 