Weir and Cockerham 1984
The same as the faster "fstat" from the geneland package but this script also gives the total variance, variance within individuals, variance within population and variance between populations both on a SNP level and on as a joint estimate.
WC84<-function(x,pop){ #number ind each population n<-table(pop) #number of populations npop<-nrow(n) #average sample size of each population n_avg<-mean(n) #total number of samples N<-length(pop) #frequency in samples p<-apply(x,2,function(x,pop){tapply(x,pop,mean)/2},pop=pop) #average frequency in all samples (apply(x,2,mean)/2) p_avg<-as.vector(n%*%p/N ) #the sample variance of allele 1 over populations s2<-1/(npop-1)*(apply(p,1,function(x){((x-p_avg)^2)})%*%n)/n_avg #average heterozygouts # h<-apply(x==1,2,function(x,pop)tapply(x,pop,mean),pop=pop) #average heterozygote frequency for allele 1 # h_avg<-as.vector(n%*%h/N) #faster version than above: h_avg<-apply(x==1,2,sum)/N #nc (see page 1360 in wier and cockerhamm, 1984) n_c<-1/(npop-1)*(N-sum(n^2)/N) #variance betwen populations a <- n_avg/n_c*(s2-(p_avg*(1-p_avg)-(npop-1)*s2/npop-h_avg/4)/(n_avg-1)) #variance between individuals within populations b <- n_avg/(n_avg-1)*(p_avg*(1-p_avg)-(npop-1)*s2/npop-(2*n_avg-1)*h_avg/(4*n_avg)) #variance within individuals c <- h_avg/2 #inbreedning (F_it) F <- 1-c/(a+b+c) #(F_st) theta <- a/(a+b+c) #(F_is) f <- 1-c(b+c) #weigted average of theta theta_w<-sum(a)/sum(a+b+c) list(F=F,theta=theta,f=f,theta_w=theta_w,a=a,b=b,c=c,total=c+b+a) } #example of use nsnp=10000 x<-matrix(rbinom(160*nsnp,2,0.02),160) pop<-rep(1:4,40) res<-WC84(x,pop) res$theta
to get the Fst for single sites use
nsnp=10000 x<-matrix(rbinom(160*nsnp,2,0.02),160) pop<-rep(1:4,40) res<-WC84(x,pop) with(res,a/(a+b+c))
WEIR, B. S., and C. C. COCKERHAM, 1984 Estimating F-statistics for the analysis of population structure. Evolution 38: 1358–1370
Weir and Hill 2002
WH02<-function(x,pop){ #moment estimation of fst using weir and hall 2002 n<-table(pop)#number ind each population npop<-nrow(n) N<-length(pop) #total number of samples p_1<-apply(x,2,function(x,pop){tapply(x,pop,mean)/2},pop=pop) #frequency in samples p_1_avg<-n%*%p_1/N #average frequency in all samples (apply(x,2,mean)/2) msp_1<-1/(npop-1)*(apply(p_1,1,function(x){((x-p_1_avg)^2)})%*%n) msg_1<-1/(N-npop)*(apply(p_1,1,function(x){x*(1-x)})%*%n) p_2<-1-p_1 p_2_avg<-n%*%p_2/N #average frequency in all samples (apply(x,2,mean)/2) msp_2<-1/(npop-1)*(apply(p_2,1,function(x){((x-p_2_avg)^2)})%*%n) msg_2<-1/(N-npop)*(apply(p_2,1,function(x){x*(1-x)})%*%n) n_c<-1/(npop-1)*(N-sum(n^2)/N) theta<-sum(msp_1-msg_1+msp_2-msg_2)/sum(msp_1+(n_c-1)*msg_1+msp_2+(n_c-1)*msg_2) return(theta) } x<-matrix(rbinom(1600,2,0.02),160) pop<-rep(1:4,40) WH02(x,pop)
WEIR, B. S., and W. G. HILL, 2002 Estimating F-statistics. Annu. Rev. Genet. 36: 721–750