This functions gives the exact same result as eigensoft iff you include the same SNPs in the analysis (defaults in eigensoft is to remove SNPs with less than two minor alleles).
Functions
eigenstrat<-function(geno){ #snp x ind matrix of genotypes \in 0,1,2 nMis<-rowSums(is.na(geno)) geno<-geno[nMis==0,] #remove snps with missing data avg<-rowSums(geno)/ncol(geno) # get allele frequency times 2 keep<-avg!=0&avg!=2 # remove sites with non-polymorphic data avg<-avg[keep] geno<-geno[keep,] snp<-nrow(geno) #number of snps used in analysis ind<-ncol(geno) #number of individuals used in analuysis freq<-avg/2 #frequency M <- (geno-avg)/sqrt(freq*(1-freq)) #normalize the genotype matrix X<-t(M)%*%M #get the (almost) covariance matrix X<-X/(sum(diag(X))/(snp-1)) E<-eigen(X) mu<-(sqrt(snp-1)+sqrt(ind))^2/snp #for testing significance (assuming no LD!) sigma<-(sqrt(snp-1)+sqrt(ind))/snp*(1/sqrt(snp-1)+1/sqrt(ind))^(1/3) E$TW<-(E$values[1]*ind/sum(E$values)-mu)/sigma E$mu<-mu E$sigma<-sigma class(E)<-"eigenstrat" E } plot.eigenstrat<-function(x,col=1,...) plot(x$vectors[,1:2],col=col,...) print.eigenstrat<-function(x) cat("statistic",x$TW,"\n")
Example
ind<-c(20,20) snp<-10000 freq=c(0.2,0.25) geno<-c() for(pop in 1:length(ind)) geno<-rbind(geno,matrix(rbinom(snp*ind[pop],2,freq[pop]),ind[pop])) geno<-t(geno) e<-eigenstrat(geno) plot(e,col=rep(1:length(ind),ind),xlab="PC1",ylab="PC2")