| 
				   | 
				
| (37 intermediate revisions by the same user not shown) | 
| Line 1: | 
Line 1: | 
 |  | ;THIS PAGE IS OBSOLETE, PLEASE SEE FST AND PCA in the sidebar for the latest versions  | 
 | Matteo Fumagalli has been working on methods to estimate Fst and doing PCA/Covariance based on ANGSD output files.  |  | Matteo Fumagalli has been working on methods to estimate Fst and doing PCA/Covariance based on ANGSD output files.  | 
 | 
  |  | 
  | 
 | The main documentation for this is found here:  |  | The main documentation for this is found here:  | 
 | https://github.com/mfumagalli/ngsTools  |  | https://github.com/mfumagalli/ngsTools  | 
 | 
  |  | 
 | For the new fancy fst estimation, you should use the latests github version.
  |  | 
 | 
  |  | 
  | 
 | =Fst=  |  | =Fst=  | 
| Line 17: | 
Line 16: | 
 | =cite=  |  | =cite=  | 
 | If you use these methods, you should cite the m. fumagalli paper http://www.ncbi.nlm.nih.gov/pubmed/23979584  |  | If you use these methods, you should cite the m. fumagalli paper http://www.ncbi.nlm.nih.gov/pubmed/23979584  | 
 | 
  |  | 
 | =New Fancy Method (sim data)=
  |  | 
 | <pre>
  |  | 
 |   nRep <- 100
  |  | 
 |        nPop1 <- 24
  |  | 
 |        nPop2 <- 16
  |  | 
 |        cmd <- paste("msms -ms",nPop1+nPop2,nRep,"-t 930 -r 400 -I 2",nPop1,nPop2,"0 -g 1 9.70406 -n 1 2 -n 2 1 -ma x 0.0 0.0 x -ej 0.07142857 2 1  >msoutput.txt ",sep=" ")
  |  | 
 |        system(cmd)
  |  | 
 |        ##system("msms -ms 40 1 -t 930 -r 400 -I 2 20 20 0 -g 1 9.70406 -n 1 2 -n 2 1 -ma x 0.0 0.0 x -ej 0.07142857 2 1  >msoutput.txt  ")
  |  | 
 |  source("../R/readms.output.R")
  |  | 
 |   to2dSFS <- function(p1.d,p2.d,nPop1,nPop2)
  |  | 
 |         sapply(0:nPop1,function(x) table(factor(p2.d[p1.d==x],levels=0:nPop2)))
  |  | 
 | 
  |  | 
 |   source("../R/readms.output.R")
  |  | 
 |         a<- read.ms.output(file="msoutput.txt")
  |  | 
 |         
  |  | 
 |         p1.d <- unlist((sapply(a$gam,function(x) colSums(x[1:nPop1,]))))
  |  | 
 |         p2.d <- unlist((sapply(a$gam,function(x) colSums(x[-c(1:nPop1),]))))
  |  | 
 |         par(mfrow=c(1,2))
  |  | 
 |         barplot(table(p1.d))
  |  | 
 |         barplot(table(p2.d))
  |  | 
 |         sfs.2d <- t(sapply(0:nPop1,function(x) table(factor(p2.d[p1.d==x],levels=0:nPop2))))
  |  | 
 |    system("../misc/msToGlf -in msoutput.txt -out raw -singleOut 1 -regLen 0 -depth 8 -err 0.005")
  |  | 
 |        system("../misc/splitgl raw.glf.gz 20 1 12 >pop1.glf.gz")
  |  | 
 |        system("../misc/splitgl raw.glf.gz 20 13 20 >pop2.glf.gz")
  |  | 
 |        system("echo \"1 250000000\" >fai.fai")
  |  | 
 |        system("../angsd -glf pop1.glf.gz -nind 12 -doSaf 1 -out pop1 -fai fai.fai -issim 1")
  |  | 
 |        system("../angsd -glf pop2.glf.gz -nind 8 -doSaf 1 -out pop2 -fai fai.fai -issim 1")
  |  | 
 |        system("../misc/realSFS pop1.saf.idx >pop1.saf.idx.ml")
  |  | 
 |        system("../misc/realSFS pop2.saf.idx >pop2.saf.idx.ml")
  |  | 
 |        system("../misc/realSFS pop1.saf.idx pop2.saf.idx -maxIter 500 -p 20  >pop1.pop2.saf.idx.ml")
  |  | 
 | getFst<-function(est){
  |  | 
 |     N1<-nrow(est)-1
  |  | 
 |     N2<-ncol(est)-1
  |  | 
 |     cat("N1: ",N1 ," N2: ",N2,"\n")
  |  | 
 |     est0<-est
  |  | 
 |     est0[1,1]<-0
  |  | 
 |     est0[N1+1,N2+1]<-0
  |  | 
 |     est0<-est0/sum(est0)
  |  | 
 |     
  |  | 
 |     aMat<<-matrix(NA,nrow=N1+1,ncol=N2+1)
  |  | 
 |     baMat<<-matrix(NA,nrow=N1+1,ncol=N2+1)
  |  | 
 |     for(a1 in 0:(N1))
  |  | 
 |         for(a2 in 0:(N2)){
  |  | 
 |             p1 <- a1/N1
  |  | 
 |             p2 <- a2/N2
  |  | 
 |             q1 <- 1 - p1
  |  | 
 |             q2 <- 1 - p2
  |  | 
 |             alpha1 <- 1 - (p1^2 + q1^2)
  |  | 
 |             alpha2 <- 1 - (p2^2 + q2^2)
  |  | 
 |             
  |  | 
 |             al <-  1/2 * ( (p1-p2)^2 + (q1-q2)^2) - (N1+N2) *  (N1*alpha1 + N2*alpha2) / (4*N1*N2*(N1+N2-1))
  |  | 
 |             bal <- 1/2 * ( (p1-p2)^2 + (q1-q2)^2) + (4*N1*N2-N1-N2)*(N1*alpha1 + N2*alpha2) / (4*N1*N2*(N1+N2-1))
  |  | 
 |             aMat[a1+1,a2+1]<<-al
  |  | 
 |             baMat[a1+1,a2+1]<<-bal
  |  | 
 |             ##  print(signif(c(a1=a1,a2=a2,p1=p1,p2=p2,al1=alpha1,al2=alpha2,al),2))
  |  | 
 |         }
  |  | 
 |     ## unweighted average of single-locus ratio estimators
  |  | 
 |     fstU <-   sum(est0*(aMat/baMat),na.rm=T)
  |  | 
 |     ## weighted average of single-locus ratio estimators
  |  | 
 |     fstW <-   sum(est0*aMat,na.rm=T)/sum(est0*baMat,na.rm=T)
  |  | 
 |     c(fstW=fstW,fstU=fstU)
  |  | 
 | }
  |  | 
 | 
  |  | 
 | > getFst(sfs.2d)
  |  | 
 | N1:  24  N2:  16 
  |  | 
 |       fstW       fstU 
  |  | 
 | 0.11945801 0.08249571 
  |  | 
 | 
  |  | 
 | est <- matrix(as.integer(scan("pop1.pop2.saf.idx.ml")),byrow=T,ncol=nPop2+1)
  |  | 
 | > getFst(est)
  |  | 
 | N1:  24  N2:  16 
  |  | 
 |       fstW       fstU 
  |  | 
 | 0.11925903 0.08241461 
  |  | 
 | 
  |  | 
 | cmd<"fst index pop1.saf.idx pop2.saf.idx -sfs pop1.pop2.saf.idx.ml  -fstout testing"
  |  | 
 | system(cmd)
  |  | 
 | 
  |  | 
 | ##view the per site 'alpha' 'beta' if you want
  |  | 
 | cmd<-"../misc/realSFS fst print testing.fst.idx |head"
  |  | 
 | 
  |  | 
 | ##use fancy new emperical bayes
  |  | 
 | cmd<- "../misc/realSFS fst stats testing.fst.idx "
  |  | 
 | system(cmd)
  |  | 
 | -> FST.Unweight:0.083316 Fst.Weight:0.119372
  |  | 
 | </pre>
  |  | 
 | 
  |  | 
 | =new fancy version=
  |  | 
 | 
  |  | 
 | <pre>
  |  | 
 | #this is with 2pops
  |  | 
 | ../angsd -b list1  -anc hg19ancNoChr.fa -out pop1 -dosaf 1 -gl 1
  |  | 
 | ../angsd -b list2  -anc hg19ancNoChr.fa -out pop1 -dosaf 1 -gl 1
  |  | 
 | ../misc/realSFS pop1.saf.idx pop2.saf.idx >pop1.pop2.ml
  |  | 
 | ../misc/realSFS fst index pop1.saf.idx pop2.saf.idx -sfs pop1.pop2.ml -fstout here
  |  | 
 | ../misc/realSFS fst stats here.fst.idx 
  |  | 
 | -> FST.Unweight:0.069395 Fst.Weight:0.042349
  |  | 
 | =3pops=
  |  | 
 | <pre>
  |  | 
 | msms -ms 41 10 -t 930 -r 400 -I 3 11 13 17 -n 1 1.682020 -n 2 3.736830 -n 3 7.292050 -eg 0 2 116.010723 -eg 0 3 160.246047 -ma x 0.881098 0.561966 0.881098 x 2.797460 0.561966 2.797460 x -ej 0.028985 3 2 -en 0.028985 2 0.287184 -ema 0.028985 3 x 7.293140 x 7.293140 x x x x x -ej 0.197963 2 1 -en 0.303501 1 1
  |  | 
 | </pre>
  |  |