This functions gives the exact same result as eigensoft iff you include the same SNPs in the analysis (defaults in eigensoft is to remove SNPs with less than two minor alleles).

Functions

eigenstrat<-function(geno){                 #snp x ind matrix of genotypes \in 0,1,2
  nMis<-rowSums(is.na(geno))
  geno<-geno[nMis==0,]                      #remove snps with missing data
  avg<-rowSums(geno)/ncol(geno)             # get allele frequency times 2
  keep<-avg!=0&avg!=2                       # remove sites with non-polymorphic data
  avg<-avg[keep]
  geno<-geno[keep,]
  snp<-nrow(geno)                           #number of snps used in analysis
  ind<-ncol(geno)                           #number of individuals used in analuysis
  freq<-avg/2                               #frequency
  M <- (geno-avg)/sqrt(freq*(1-freq))       #normalize the genotype matrix
  X<-t(M)%*%M                               #get the (almost) covariance matrix
  X<-X/(sum(diag(X))/(snp-1))
  E<-eigen(X)

  mu<-(sqrt(snp-1)+sqrt(ind))^2/snp         #for testing significance (assuming no LD!)
  sigma<-(sqrt(snp-1)+sqrt(ind))/snp*(1/sqrt(snp-1)+1/sqrt(ind))^(1/3)
  E$TW<-(E$values[1]*ind/sum(E$values)-mu)/sigma
  E$mu<-mu
  E$sigma<-sigma
  class(E)<-"eigenstrat"
  E
}
plot.eigenstrat<-function(x,col=1,...)
  plot(x$vectors[,1:2],col=col,...)

print.eigenstrat<-function(x)
  cat("statistic",x$TW,"\n")

Example

ind<-c(20,20)
snp<-10000
freq=c(0.2,0.25)
geno<-c()
for(pop in 1:length(ind))
  geno<-rbind(geno,matrix(rbinom(snp*ind[pop],2,freq[pop]),ind[pop]))
geno<-t(geno)
e<-eigenstrat(geno)

plot(e,col=rep(1:length(ind),ind),xlab="PC1",ylab="PC2")