(************** Content-type: application/mathematica ************** CreatedBy='Mathematica 5.1' Mathematica-Compatible Notebook This notebook can be used with any Mathematica-compatible application, such as Mathematica, MathReader or Publicon. The data for the notebook starts with the line containing stars above. To get the notebook into a Mathematica-compatible application, do one of the following: * Save the data starting with the line of stars above into a file with a name ending in .nb, then open the file inside the application; * Copy the data starting with the line of stars above to the clipboard, then use the Paste menu command inside the application. Data for notebooks contains only printable 7-bit ASCII and can be sent directly in email or through ftp in text mode. Newlines can be CR, LF or CRLF (Unix, Macintosh or MS-DOS style). NOTE: If you modify the data for this notebook not in a Mathematica- compatible application, you must delete the line below containing the word CacheID, otherwise Mathematica-compatible applications may try to use invalid cache data. For more information on notebooks and Mathematica-compatible applications, contact Wolfram Research: web: http://www.wolfram.com email: info@wolfram.com phone: +1-217-398-0700 (U.S.) Notebook reader applications are available free of charge from Wolfram Research. *******************************************************************) (*CacheID: 232*) (*NotebookFileLineBreakTest NotebookFileLineBreakTest*) (*NotebookOptionsPosition[ 9154, 281]*) (*NotebookOutlinePosition[ 9926, 308]*) (* CellTagsIndexPosition[ 9882, 304]*) (*WindowFrame->Normal*) Notebook[{ Cell["Principal Component Analysis", "Title"], Cell[CellGroupData[{ Cell["Sample Covariance Matrix", "Section"], Cell["\<\ Convert the matrix a to mean deviation form and calculate its \ sample covariance matrix. (See Lay 7.5, pp489-491)\ \>", "Text"], Cell[BoxData[{ RowBox[{\(Clear[a];\), "\[IndentingNewLine]"}], "\[IndentingNewLine]", RowBox[{ RowBox[{ RowBox[{"a", "=", RowBox[{"(", GridBox[{ {"120", "125", "125", "135", "145"}, {"61", "60", "64", "68", "72"} }], ")"}]}], ";"}], "\[IndentingNewLine]"}], "\[IndentingNewLine]", \(ListPlot[ Transpose[a], \[IndentingNewLine]ImageSize \[Rule] 400, \[IndentingNewLine]AxesLabel \[Rule] {"\", "\"}, \[IndentingNewLine]PlotStyle \[Rule] {PointSize[0.02], Red}];\)}], "Input"], Cell[TextData[{ "Write a procedure to calculate the ", StyleBox["sample mean", FontWeight->"Bold"], " of a matrix and use it to calculate the sample mean of a." }], "Text"], Cell[BoxData[ \(sampleMean[ a_?MatrixQ] := \[IndentingNewLine]Module[{n = \(Dimensions[ a]\)[\([2]\)]}, \[IndentingNewLine]\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \(1\/n\) Apply[Plus, Transpose[a]]]\)], "Input"], Cell[BoxData[{ \(\(m = sampleMean[a];\)\), "\[IndentingNewLine]", \(% // MatrixForm\)}], "Input"], Cell[TextData[{ "Write a procedure to calculate the ", StyleBox["mean deviation form", FontWeight->"Bold"], " of a matrix and use it to calculate the mean deviation form of a." }], "Text"], Cell[BoxData[ \(<< LinearAlgebra`MatrixManipulation`\)], "Input"], Cell[BoxData[ \(\(meanDeviationForm[ a_?MatrixQ] := \[IndentingNewLine]Module[{m = sampleMean[a], n = \(Dimensions[ a]\)[\([2]\)]}, \[IndentingNewLine]\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Transpose[ Table[\(Transpose[a]\)[\([k]\)] - m, \[IndentingNewLine]\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ {k, 1, n}]]];\)\)], "Input"], Cell[BoxData[{ \(\(b = meanDeviationForm[a];\)\), "\[IndentingNewLine]", \(% // MatrixForm\[IndentingNewLine]\), "\[IndentingNewLine]", \(\(ListPlot[ Transpose[b], \[IndentingNewLine]ImageSize \[Rule] 400, \[IndentingNewLine]AxesLabel \[Rule] {"\", \ "\"}, \[IndentingNewLine]PlotStyle \[Rule] {PointSize[0.02], Red}];\)\)}], "Input"], Cell["Check: b should have zero sample mean.", "Text"], Cell[BoxData[{ \(\(sampleMean[b];\)\), "\[IndentingNewLine]", \(% // MatrixForm\)}], "Input"], Cell[TextData[{ "Write a procedure to calculate the ", StyleBox["sample covariance matrix", FontWeight->"Bold"], " of a given matrix and use it to calculate the sample covariance matrix of \ a." }], "Text"], Cell[BoxData[ RowBox[{ RowBox[{ StyleBox["sampleCovarianceMatrix", FontWeight->"Bold"], "[", \(a_?MatrixQ\), "]"}], ":=", "\[IndentingNewLine]", \(Module[{n = \(Dimensions[ a]\)[\([2]\)]}, \[IndentingNewLine]\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \(1\/\(n - 1\)\) a . Transpose[a]]\)}]], "Input"], Cell[BoxData[{ RowBox[{ RowBox[{"s", "=", RowBox[{ StyleBox["sampleCovarianceMatrix", FontWeight->"Bold"], "[", "b", "]"}]}], ";"}], "\[IndentingNewLine]", \(% // MatrixForm\)}], "Input"] }, Open ]], Cell[CellGroupData[{ Cell["Principal Components", "Section"], Cell["\<\ Bring forward the sample covariance matrix from the previous \ example.\ \>", "Text"], Cell[BoxData[ \(s // MatrixForm\)], "Input"], Cell["\<\ We seek to diagonalize the matrix s. Calculate the eigendata for s.\ \>", "Text"], Cell[BoxData[ \({evals, evecs} = Eigensystem[s]\)], "Input"], Cell["\<\ Construct d, the diagonal matrix of eigenvalues in decreasing \ order.\ \>", "Text"], Cell[BoxData[{ \(\(d = DiagonalMatrix[evals] // N;\)\), "\[IndentingNewLine]", \(% // MatrixForm\)}], "Input"], Cell["\<\ Construct the orthonormal matrix p, consisting of normalized \ eigenvectors of s.\ \>", "Text"], Cell[BoxData[{ \(\(p = Transpose[ Table[evecs[\([k]\)]/ Norm[evecs[\([k]\)] // N], \[IndentingNewLine]{k, 2}]];\)\), "\[IndentingNewLine]", \(% // MatrixForm\)}], "Input"], Cell["Check the diagonalization.", "Text"], Cell[BoxData[ \(s \[Equal] p . d . Transpose[p]\)], "Input"], Cell["The columns of p are the principal components of the data.", "Text"], Cell[BoxData[{ \(<< Graphics`Graphics`\), "\[IndentingNewLine]", \(<< Graphics`Arrow`\)}], "Input"], Cell[BoxData[ \(showColorfulVectors[vecs_, color_, opts___] := Show[Graphics[ Flatten[{Thickness[ .004], color, Table[Arrow[vecs[\([i, 1]\)], vecs[\([i, 1]\)] + vecs[\([i, 2]\)]], {i, Length[vecs]}]}]], \ opts, \ AspectRatio \[Rule] Automatic, \ Axes \[Rule] True]\)], "Input"], Cell[BoxData[{ \({u1, u2} = Transpose[p]\), "\[IndentingNewLine]", \(\(o = {0, 0};\)\[IndentingNewLine]\), "\[IndentingNewLine]", \(\(DisplayTogether[\[IndentingNewLine]ListPlot[ Transpose[b], \[IndentingNewLine]ImageSize \[Rule] 400, \[IndentingNewLine]AxesLabel \[Rule] {"\", \ "\"}, \[IndentingNewLine]PlotStyle \[Rule] {PointSize[0.02], Red}], \[IndentingNewLine]Show[ showColorfulVectors[{{o, 10\ u1}, {o, 10 u2}}, ManganeseBlue]]];\)\)}], "Input"], Cell["\<\ Use the principal components to define and relate the variables X \ and Y. X represents the original data. Y represents the transformed data. The change of basis matrix p is orthonormal.\ \>", "Text"], Cell[BoxData[{ \(\(x = {x1, x2};\)\), "\[IndentingNewLine]", \(\(\(y = {y1, y2};\)\(\[IndentingNewLine]\) \)\), "\[IndentingNewLine]", \(\(y = Transpose[p] . x;\)\), "\[IndentingNewLine]", \(% // MatrixForm\)}], "Input"], Cell["\<\ The matrix d is the covariance matrix for the transformed data. y1 and y2 are independent.\ \>", "Text"], Cell[BoxData[ \(d // MatrixForm\)], "Input"], Cell[BoxData[{ \(\(v1 = {1, 0};\)\), "\[IndentingNewLine]", \(\(v2 = {0, 1};\)\), "\[IndentingNewLine]", \(\(o = {0, 0};\)\[IndentingNewLine]\), "\[IndentingNewLine]", \(\(DisplayTogether[\[IndentingNewLine]ListPlot[ Transpose[Transpose[p] . b], \[IndentingNewLine]ImageSize \[Rule] 400, \[IndentingNewLine]AxesLabel \[Rule] {"\", "\ \"}, \[IndentingNewLine]PlotStyle \[Rule] {PointSize[0.02], Red}], \[IndentingNewLine]Show[ showColorfulVectors[{{o, 10\ v1}, {o, 10 v2}}, ManganeseBlue]]];\)\)}], "Input"], Cell["\<\ Calculate the percentage of the total variance contained in the \ first principal component. Tr[d] is the trace of the matrix d.\ \>", "Text"], Cell[BoxData[{ \(\(\(firstPC = u1\)\(\[IndentingNewLine]\) \)\), "\[IndentingNewLine]", \(percentVarianceFirstPC = d[\([1, 1]\)]/Tr[d]\)}], "Input"] }, Open ]], Cell["\<\ The first principal component accounts for 98.4% of the variance in \ this data.\ \>", "Text"] }, FrontEndVersion->"5.1 for Macintosh", ScreenRectangle->{{4, 1280}, {0, 832}}, CellGrouping->Manual, WindowSize->{783, 791}, WindowMargins->{{63, Automatic}, {Automatic, 0}}, PrintingCopies->1, PrintingPageRange->{1, Automatic}, CellLabelAutoDelete->True, StyleDefinitions -> "Report.nb" ] (******************************************************************* Cached data follows. If you edit this Notebook file directly, not using Mathematica, you must remove the line containing CacheID at the top of the file. The cache data will then be recreated when you save this file from within Mathematica. *******************************************************************) (*CellTagsOutline CellTagsIndex->{} *) (*CellTagsIndex CellTagsIndex->{} *) (*NotebookFileOutline Notebook[{ Cell[1754, 51, 45, 0, 74, "Title"], Cell[CellGroupData[{ Cell[1824, 55, 43, 0, 67, "Section"], Cell[1870, 57, 138, 4, 44, "Text"], Cell[2011, 63, 624, 13, 160, "Input"], Cell[2638, 78, 182, 5, 28, "Text"], Cell[2823, 85, 229, 4, 84, "Input"], Cell[3055, 91, 106, 2, 48, "Input"], Cell[3164, 95, 198, 5, 28, "Text"], Cell[3365, 102, 69, 1, 32, "Input"], Cell[3437, 105, 471, 9, 80, "Input"], Cell[3911, 116, 414, 7, 128, "Input"], Cell[4328, 125, 54, 0, 28, "Text"], Cell[4385, 127, 102, 2, 48, "Input"], Cell[4490, 131, 216, 6, 28, "Text"], Cell[4709, 139, 342, 7, 84, "Input"], Cell[5054, 148, 235, 6, 48, "Input"] }, Open ]], Cell[CellGroupData[{ Cell[5326, 159, 39, 0, 67, "Section"], Cell[5368, 161, 95, 3, 28, "Text"], Cell[5466, 166, 48, 1, 32, "Input"], Cell[5517, 169, 91, 3, 44, "Text"], Cell[5611, 174, 64, 1, 32, "Input"], Cell[5678, 177, 94, 3, 28, "Text"], Cell[5775, 182, 119, 2, 48, "Input"], Cell[5897, 186, 105, 3, 28, "Text"], Cell[6005, 191, 228, 6, 64, "Input"], Cell[6236, 199, 42, 0, 28, "Text"], Cell[6281, 201, 64, 1, 32, "Input"], Cell[6348, 204, 74, 0, 28, "Text"], Cell[6425, 206, 108, 2, 48, "Input"], Cell[6536, 210, 360, 7, 112, "Input"], Cell[6899, 219, 563, 9, 160, "Input"], Cell[7465, 230, 210, 5, 60, "Text"], Cell[7678, 237, 243, 5, 96, "Input"], Cell[7924, 244, 114, 3, 44, "Text"], Cell[8041, 249, 48, 1, 32, "Input"], Cell[8092, 252, 620, 10, 176, "Input"], Cell[8715, 264, 152, 4, 44, "Text"], Cell[8870, 270, 161, 3, 64, "Input"] }, Open ]], Cell[9046, 276, 104, 3, 28, "Text"] } ] *) (******************************************************************* End of Mathematica Notebook file. *******************************************************************)