pca.py
from numpy import *
import numpy as np
def loadDataSet(fileName, delim=' '):
fr = open(fileName)
stringArr = [line.strip().split(delim) for line in fr.readlines()]
datArr = [map(float,line) for line in stringArr]
return mat(datArr)
def percentage2n(eigVals,percentage):
sortArray=np.sort(eigVals)
sortArray=sortArray[-1::-1]
arraySum=sum(sortArray)
tmpSum=0
num=0
for i in sortArray:
tmpSum+=i
num+=1
if tmpSum>=arraySum*percentage:
return num
def pca(dataMat, topNfeat=9999999):
meanVals = mean(dataMat, axis=0)
meanRemoved = dataMat - meanVals
covMat = cov(meanRemoved, rowvar=0)
eigVals,eigVects = linalg.eig(mat(covMat))
eigValInd = argsort(eigVals)
eigValInd = eigValInd[:-(topNfeat+1):-1]
redEigVects = eigVects[:,eigValInd]
lowData_N = meanRemoved * redEigVects
reconMat_N = (lowData_N * redEigVects.T) + meanVals
return lowData_N,reconMat_N
def pcaPerc(dataMat, percentage=1):
meanVals = mean(dataMat, axis=0)
meanRemoved = dataMat - meanVals
covMat = cov(meanRemoved, rowvar=0)
eigVals,eigVects = linalg.eig(mat(covMat))
eigValInd = argsort(eigVals)
n=percentage2n(eigVals,percentage)
n_eigValIndice=eigValInd[-1:-(n+1):-1]
n_eigVect=eigVects[:,n_eigValIndice]
lowData_P=meanRemoved*n_eigVect
reconMat_P = (lowData_P * n_eigVect.T) + meanVals
return lowData_P,reconMat_P
readData.py
import matplotlib.pyplot as plt
from pylab import *
import numpy as np
import scipy.io as sio
def loadData(filename,mName):
load_fn = filename
load_data = sio.loadmat(load_fn)
load_matrix = load_data[mName]
return load_matrix
main.py
import matplotlib.pyplot as plt
from pylab import *
import numpy as np
import scipy.io as sio
import pca
from numpy import mat,matrix
import scipy as sp
import readData
import pca
if __name__ == '__main__':
A1=readData.loadData('6electrodes.mat','A1')
lowData_N, reconMat_N= pca.pca(A1,30)
lowData_P, reconMat_P = pca.pcaPerc(A1,0.95)
print shape(lowData_N)
print shape(reconMat_N)
print shape(lowData_P)
print shape(reconMat_P)