#Simulate a dataset with 4 hightly correlated variables
x=rnorm(20)
x=cbind(v1=x, v2=x+rnorm(20,sd=0.02), v3=x+rnorm(20,sd=0.01), v4=x+rnorm(20,sd=0.03))
#check correlation matrix
cor(x)
#Compute variances of the original variables
apply(x,2,var)
#Total variance
(total=sum(apply(x,2,var)))
#Compute PCA. Note here we don't center or scale the data just to keep the original variance
pca=prcomp(x,center = FALSE, scale. = FALSE)
pca
#Check variance explained by the PCs
pca$sdev^2
#Variance can also be computed from rotated data or PCA scores
apply(pca$x,2,var)
#Compare the sum of total variances by the PCs, which should equal to the total variance of the original variables
sum(pca$sdev^2)
#Proportion of variance explained by PCs
pca$sdev^2/total
#Check correlation between the original variables and PC1
apply(x,2,cor,y=prcomp(x)$x[,'PC1'])
#Check correlation between the original variables and PC2
apply(x,2,cor,y=prcomp(x)$x[,'PC2'])
#Check correlation between the original variables and PC3
apply(x,2,cor,y=prcomp(x)$x[,'PC3'])
#Check correlation between the original variables and PC4
apply(x,2,cor,y=prcomp(x)$x[,'PC4'])
#Now let us compare PCs with a new "naive" coordinate: the mean across variables
average=rowMeans(x)
#variance of average values
var(average)
#Proportion of variance explained by average
var(average)/total
#Check correlation between the original variables and average
apply(x,2,cor,y=average)