// Demonstrates use of SVD to predict missing ratings import edu.gwu.lintool.*; import java.util.*; public class MovieRatingsSVD { public static void main (String[] argv) { // Rows are users, columns are movies. // Missing ratings are marked by 0 double[][] X = { // Movies: 1 2 3 4 5 6 1+2=action, 3+4=drama, 5+6=comedy {5, 4, 1, 1, 4, 0}, // User 1, users 1 & 2 are similar {4, 5, 1, 2, 0, 5}, // User 2 {0, 0, 4, 0, 5, 4}, // User 3 users 3 and 4 are similar {4, 0, 4, 0, 5, 0}, // User 4 {4, 4, 1, 0, 1, 1}, // User 5 5 & 6 are similar {5, 5, 0, 1, 2, 1}, // User 6 {3, 3, 3, 3, 3, 3}, // User 7 7 & 8 are similar {2, 3, 2, 3, 2, 3}, // User 8 {5, 0, 0, 0, 0, 0} // User 9 9 is very dissimilar from all }; directSVD (X); directSVD (fillColumnAverages(X)); } static void directSVD (double[][] X) { int m = X.length, n = X[0].length; // Step #1: Compute SVD. LinResult L = LinToolLibrary.computeSVDShortForm (X); // Print the sigma (middle) matrix: LinUtil.print ("Sigma", L.Sigma); // This stays the same through the loop. double[][] VT = MatrixTool.transpose (L.V); // Step #2: Compute the best approximation // Note: we leave out the last sigma // If we left in the last one, the full SVD is always the best. double minError = Double.MAX_VALUE; int which = 0; double[][] Sigma = copy (L.Sigma); Sigma[n-1][n-1] = 0; // Zero out the last. for (int i=n-2; i>=1; i--) { // Zero out Sigma[i][i] Sigma[i][i] = 0; // Multiply out to get new ratings matrix. double[][] U_Sigma = MatrixTool.matrixMult(L.U, Sigma); double[][] predicted = MatrixTool.matrixMult(U_Sigma, VT); // Compute error between X and predicted, and update the min double error = meanSquareError (X, predicted); if (error < minError) { minError = error; which = i; } } System.out.println ("Direct: Best=" + which); // Apply the best for (int i=which; i 0) { error += (X[i][j] - Y[i][j]) * (X[i][j] - Y[i][j]); } } } return (error / (m * n)); } static double[][] fillColumnAverages (double[][] X) { int m = X.length, n = X[0].length; // First, compute column (averages) double[] colAvg = new double[n]; for (int j=0; j 0) { colAvg[j] += X[i][j]; // Include only known ratings. count ++; } } colAvg[j] = colAvg[j] / count; } // Now make a new dataset replacing zeroes with averages. double[][] Y = new double [m][n]; for (int i=0; i 0) { Y[i][j] = X[i][j]; } else { Y[i][j] = colAvg[j]; } } } return Y; } static double[][] copy (double[][] X) { int m = X.length, n = X[0].length; double[][] Y = new double [m][n]; for (int i=0; i