Simulation of Treemix Estimators

1. Functions
2. Simulations
- 2.1. short branch
- 2.2. long branch
3. Real Dataset
- 3.1. 1000 Genomes
- 3.2. Human origins

1. Functions

using Distributions
using LinearAlgebra
using Base.Threads
using CodecZlib
using Statistics
using StatsPlots
using DelimitedFiles
using NamedArrays

function hatV(X)
    m, n = size(X)
    Vis = Array{Float64}(undef, m, m, n)
    for i in 1:n
        Vis[:,:,i] = X[:,i] * X[:,i]' - mean(X[:,i])^2 * ones(m,m)
    end
    return mean(Vis, dims=3)[:,:,1]
end

function hatΣ(X)
    m, n = size(X)
    novertwofloored = n÷2
    return 1/(2*novertwofloored)*sum((X[:,2k]-X[:,2k-1])*(X[:,2k]-X[:,2k-1])' for k in 1:novertwofloored)
end

function hatΣ(X1, X2)
    m1, n1 = size(X1)
    m2, n2 = size(X2)
    m1 == m2 || throw(DimensionMismatch("X1 and X2 should have the same number of rows."))
    n = min(n1, n2)
    return 1/(2*n)*sum((X1[:,k]-X2[:,k])*(X1[:,k]-X2[:,k])' for k in 1:n)
end

function hatW(X)
    m, n = size(X)
    Wis = Array{Float64}(undef, m, m, n)
    M = (I-ones(m,m)/m)
    for i in 1:n
        Wis[:,:,i] = M * X[:,i] * X[:,i]' * M
    end
    return mean(Wis, dims=3)[:,:,1]
end

function divideintwo(v)
    k0 = length(v)÷2
    for k in 1:length(v)
        if sum(v[1:k]) > sum(v[k+1:end])
            k0 = k
            break
        end
    end
    if sum(v[1:k0-1]) > sum(v[k0:end])
        return k0 - 1
    else
        return k0
    end
end

for func in (:hatW, :hatV, :hatΣ)
    @eval begin
        function ($func)(X::Vector{Matrix{Float64}})
            m = size(X[1],1)
            for index in 2:length(X)
                m == size(X[index],1) || throw(DimensionMismatch("Each matrix in X should have the same number of rows."))
            end
            ns = [size(X[k], 2) for k in eachindex(X)]
            index = divideintwo(ns)
            X1 = hcat(X[1:index]...)
            X2 = hcat(X[index+1:end]...)
            return ($func)(X1,X2)
        end
    end
end

for func in (:hatW, :hatV)
    @eval begin
        function ($func)(X1,X2)
            m1, n1 = size(X1)
            m2, n2 = size(X2)
            m1 == m2 || throw(DimensionMismatch("X1 and X2 should have the same number of rows."))
            n = min(n1, n2)
            X = [X1[:,1:n] X2[:,1:n]]
            return ($func)(X)
        end
    end
end

function trueW(Σ)
    m = size(Σ, 1)
    M = (I-ones(m,m)/m)
    return M * Σ * M
end

function trueV(Σ)
    m = size(Σ,1)
    return Σ - mean(Σ) * ones(m,m)
end


function find2partitions(n)
    n ≥ 1 || throw(AssertionError("n should be at least 1."))
    all2partitions=Vector{Vector{Int}}[] # empty set of partitions
    for i in 1:2^(n-1)-1 # gives strings "0...01" to "01...1".
        # Note that "01001" and the logical complement "10110"
        # give rise to the same partition. Hence we sum to 2^(n-1)-1.
        s = string(i, base=2, pad=n) # padded to a string of size n, by
        # adding zeros at the left.
        A = Int[] # empty set
        B = Int[] # empty set
        for j in 1:n
            if s[j] == '0'
                push!(A, j)
            else
                push!(B, j)
            end
        end
        push!(all2partitions, [A, B])
    end
    @assert length(all2partitions) == n2partitions(n)
    return all2partitions
end

function n2partitions(n)
    if n < 1
        throw(AssertionError("n should be at least 1"))
    elseif n == 1
        return 0
    elseif n == 2
        return 1
    elseif iseven(n)
        return sum(binomial(n,k) for k in 1:(n-1)÷2) + binomial(n,n÷2)÷2 # binomial(n,n/2) counts
        # partitions with sets of size n/2 and n/2 double.
    else
        return sum(binomial(n,k) for k in 1:n÷2)
    end
end

function findMaximisingPartitionInV(V)
    m = size(V, 1)
    all2partitions = find2partitions(m)
    minimisingindex = argmin([mean(V[i,j] for i in partition[1] for j in partition[2]) for partition in all2partitions])
    return all2partitions[minimisingindex]
end

function printMaximisingPartitions(X, pops::Vector{String})
    O = findMaximisingPartitionInV(X)
    println("1st partition:", [ pops[i] for i in O[1] ])
    println("2nd partition:", [ pops[i] for i in O[2] ])
end

# parser for Treemix input format.
# pop1 pop2 pop3 pop4 pop5 pop6
# counts of ref allele,counts of alt allele ...
function parseTreemixInput(gzfile::String)
    X = Float64[]
    i, j, npop= 0, 0, 0
    pops = String[]
    println("the populations are: ")
    for line in eachline(GzipDecompressorStream(open(gzfile)))
        if i == 0
            println(line)
            pops = convert(Vector{String},split(line))
            npop = length(pops)
        else
            # fit the matrix
            tmp = split(line)
            @assert length(tmp) == npop
            # A = Int64[]
            # S = Int64[]
            for j in 1:npop
                tmp2 = split(tmp[j], ",")
                # push!(A, parse(Int, tmp2[2]))
                # push!(S, parse(Int, tmp2[1]) + parse(Int, tmp2[2]))
                push!(X, parse(Int, tmp2[2]) / (parse(Int, tmp2[1]) + parse(Int, tmp2[2])))
            end
            # for i in 1:length(A)
            #     # push!(X, a)
            #     push!(X, A[i] * sum(S) / S[i])
            # end
        end
        i += 1
    end

    nsnp = Int(length(X) / npop)
    O = NamedArray(reshape(X, (npop, nsnp)))
    setnames!(O, pops, 1)
    return O
end

function parseMsOut(gzfile::String)
    nsam, nrep, npop, nind = 0, 0, 0, 0
    X = Int64[]  # msnp x nind
    XX = Matrix{Float64}[]
    start = false
    println("started parsing ms output file")
    io = GzipDecompressorStream(open(gzfile))
    for line in eachline(io)
        if startswith(line, "ms")
            cmd = split(line)
            nind = parse(Int, cmd[2])    # total number of samples
            nrep = parse(Int, cmd[3])
            npop = parse(Int, cmd[10])   # assume each pop has the same number of inds
            nsam = Int(nind / npop)      # samples in each pop
        end
        if startswith(line, "positions")
            start = true
            continue
        end
        if start
            # parse the context into XX
            # when hit the blank line, set start = false
            if line == "" || eof(io)
                if eof(io)
                    for i in line
                        push!(X, parse(Int, i))
                    end
                end
                start = false
                msnp = Int(length(X) / nind)
                X = reshape(X, (msnp, nind))
                X2 = Int64[]  # npop, msnp
                for i in 1:npop
                    s = (i - 1) * nsam  + 1
                    e = i * nsam
                    append!(X2, sum(X[:, s:e], dims=2))
                end
                X2 = transpose(reshape(X2, (msnp, npop)))
                push!(XX, X2)      # no normalize
                # push!(XX, X2 / nind)
                X = Int64[]
            else
                for i in line
                    push!(X, parse(Int, i))
                end
            end
        end
    end
    return XX
end

function norm4plot!(X)
    X[diagind(X)] .= NaN
    n = size(X, 1)
    for i in 1:n-1
        X[i, :] .-= mean(X[i, i+1:n])
    end
    X = UpperTriangular(X) + transpose(UpperTriangular(X))
    return X
end


function plotCov(X, title=nothing, colorbar=false)
    C = cov(transpose(X))
    norm4plot!(C)
    heatmap(C, colorbar=colorbar, title = title)
end

function plotVh(X, title=nothing, colorbar=false)
    V = hatV(X)
    norm4plot!(V)
    heatmap(V, colorbar=colorbar, title = title)
end

function plotΣ(X, title=nothing, colorbar=false)
    Σ = hatΣ(X)
    norm4plot!(Σ)
    heatmap(Σ, colorbar=colorbar, title = title)
end

function plotWh(X, title=nothing, colorbar=false)
    Wh = hatW(X)
    norm4plot!(Wh)
    heatmap(Wh, colorbar=colorbar, title = title)
end

function plotWh2(X, title=nothing, colorbar=false)
    W = hatW(X)
    d = sqrt(inv(Diagonal(W)))
    Wh = d * W * d
    heatmap(Wh, colorbar=colorbar, title = title)
end

function saveAsTreemixInput(X, out)
    if typeof(X) == Matrix{Float64}
        C = X
    else
        C = X[1]
        for i in 2:size(X, 1)
            C = hcat(C, X[i])
        end
    end

    io = GzipCompressorStream(open(out, "w"))
    # io = open(out, "w")
    (n, m) = size(C)
    println(n, ",", m)
    for i in 1:n
        write(io, "p$i ")
    end
    write(io, "\n")
    # writedlm(io, transpose(X), " ")
    for i in 1:m
        for j in 1:n
            r = Int(C[j, i])
            a = n - r
            write(io, "$r,$a ")
        end
        write(io, "\n")
    end
    close(io)
end

function plotAll(X; kwargs...)
    if typeof(X) == Vector{Matrix{Float64}}
        X2 = X[1]
        for i in 2:size(X, 1)
            X2 = hcat(X2, X[i])
        end
        C = cov(transpose(X2))
    else
        C = cov(transpose(X))
    end
    W = hatW(X)
    V = hatV(X)
    Σ = hatΣ(X)
    plot(
        heatmap(C, title = "Cov";kwargs...),
        heatmap(W, title = "W";kwargs...),
        heatmap(V, title = "V";kwargs...),
        heatmap(Σ, title = "Σ";kwargs...),
        size = (1000, 1000)
    )
end

function plotAll2(X; kwargs...)
    if typeof(X) == Vector{Matrix{Float64}}
        X2 = X[1]
        for i in 2:size(X, 1)
            X2 = hcat(X2, X[i])
        end
        C = norm4plot!(cov(transpose(X2)))
    else
        C = norm4plot!(cov(transpose(X)))
    end
    W = norm4plot!(hatW(X))
    V = norm4plot!(hatV(X))
    Σ = norm4plot!(hatΣ(X))
    plot(
        heatmap(C, title = "Cov";kwargs...),
        heatmap(W, title = "W";kwargs...),
        heatmap(V, title = "V";kwargs...),
        heatmap(Σ, title = "Σ";kwargs...),
        size = (1000, 1000)
    )
end

println("load functions done")

load functions done

source("plotting_funcs.R")

2. Simulations

Simulation is created by ms program. The commands used by Treemix paper can be found here.

usage: ms nsam howmany
  Options:
	 -t theta   (this option and/or the next must be used. Theta = 4*N0*u )
	 -s segsites   ( fixed number of segregating sites)
	 -T          (Output gene tree.)
	 -F minfreq     Output only sites with freq of minor allele >= minfreq.
	 -r rho nsites     (rho here is 4Nc)
		 -c f track_len   (f = ratio of conversion rate to rec rate. tracklen is mean length.)
			 if rho = 0.,  f = 4*N0*g, with g the gene conversion rate.
	 -G alpha  ( N(t) = N0*exp(-alpha*t) .  alpha = -log(Np/Nr)/t
	 -I npop n1 n2 ... [mig_rate] (all elements of mig matrix set to mig_rate/(npop-1)
		 -m i j m_ij    (i,j-th element of mig matrix set to m_ij.)
		 -ma m_11 m_12 m_13 m_21 m_22 m_23 ...(Assign values to elements of migration matrix.)
		 -n i size_i   (popi has size set to size_i*N0
		 -g i alpha_i  (If used must appear after -M option.)
	   The following options modify parameters at the time 't' specified as the first argument:
	 -eG t alpha  (Modify growth rate of all pop's.)
	 -eg t i alpha_i  (Modify growth rate of pop i.)
	 -eM t mig_rate   (Modify the mig matrix so all elements are mig_rate/(npop-1)
	 -em t i j m_ij    (i,j-th element of mig matrix set to m_ij at time t )
	 -ema t npop  m_11 m_12 m_13 m_21 m_22 m_23 ...(Assign values to elements of migration matrix.)
	 -eN t size  (Modify pop sizes. New sizes = size*N0 )
	 -en t i size_i  (Modify pop size of pop i.  New size of popi = size_i*N0 .)
	 -es t i proportion  (Split: pop i -> pop-i + pop-npop, npop increases by 1.
		 proportion is probability that each lineage stays in pop-i. (p, 1-p are admixt. proport.
		 Size of pop npop is set to N0 and alpha = 0.0 , size and alpha of pop i are unchanged.
	 -ej t i j   ( Join lineages in pop i and pop j into pop j
		  size, alpha and M are unchanged.
	 -eA t popi num ( num Ancient samples from popi at time t
	  -f filename     ( Read command line arguments from file filename.)
	  -p n ( Specifies the precision of the position output.  n is the number of digits after the decimal.)
	  -a (Print out allele ages.)

For both short branch and long branch, we simulate 400 independent regions for 20 populations with 20 samples each population without migrations. ms 400 400 -t 200 -r 200 500000

We generated coalescent simulations from several histories; the basic structure was a set of 20 populations produced by a serial bottleneck model like that used by DeGiorgio et al. to model human history(see Figure 2A in Treemix paper). The parameters of the simulations were chosen to be reasonable for non-African human populations; we used an effective population size of 10,000, and a history where all 20 populations share a common ancestor 2000 generations in the past. Each individual simulation involved 400 regions of approximately 500 KB each, and thus recapitulated many aspects of real data, including hundreds of thousands of loci and the presence of linkage disequilibrium.

Explanation of the above ms commands: ms nsam nreps -t θ -r ρ nsites

ms will generate samples under a finite-sites uniform recombination model (Hud- son, 1983). In this model, the number of sites between which recombination can occur is finite and specified by the user (with the parameter nsites). Despite the finite number of sites between which recombination can occur, the muta- tion process is still assumed to occur according to the ”infinite-sites” model, in that no recurrent mutation occurs, and the positions of the mutations are specified on a continuous scale from zero to one, as in the no-recombination case. To include crossing-over (recombination) in the model, one uses the -r switch and specifies the cross-over rate parameter, ρ which is \(4N_{0}r\), where r is the probability of cross-over per generation between the ends of the locus being simulated. In addition, the user specifies the numbers of sites, nsites, between which recombination can occur. One should think of nsites as the number of base pairs in the locus. The default cross-over probability between adjacent base pairs is 10−8 per generation. The third parameter here is the mutation parameter, θ = \(4N_{0}\) , where \(N_{0}\) is the diploid population size and where μ is the neutral mutation rate for the entire locus.

In our cases, \(N_{0}=10000\), nsites=500000, \(p = 4 \times 10000 \times 10^{-8} \times 5 \times 10^{5} = 200\).

2.1. short branch

2.1.1. commands

Here we simulate 400 independent regions for 20 populations with 20 samples each population without migrations.

# output 400 regions, 400 samples in total and 20 samples each population
# -t theta (Theta = 4*N0*u)
# -r rho nsites     (rho here is 4Nc)
ms 400 400 -t 200 -r 200 500000 -I 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 -en 0.00270 20 0.025 -ej 0.00275 20 19 -en 0.00545 19 0.025 -ej 0.00550 19 18 -en 0.00820 18 0.025 -ej 0.00825 18 17 -en 0.01095 17 0.025 -ej 0.011 17 16 -en 0.01370 16 0.025 -ej 0.01375 16 15 -en 0.01645 15 0.025 -ej 0.01650 15 14 -en 0.01920 14 0.025 -ej 0.01925 14 13 -en 0.02195 13 0.025 -ej 0.02200 13 12 -en 0.02470 12 0.025 -ej 0.02475 12 11 -en 0.02745 11 0.025 -ej 0.02750 11 10 -en 0.03020 10 0.025 -ej 0.03025 10 9 -en 0.03295 9 0.025 -ej 0.03300 9 8 -en 0.03570 8 0.025 -ej 0.03575 8 7 -en 0.03845 7 0.025 -ej 0.03850 7 6 -en 0.04120 6 0.025 -ej 0.04125 6 5 -en 0.04395 5 0.025 -ej 0.04400 5 4 -en 0.04670 4 0.025 -ej 0.04675 4 3 -en 0.04945 3 0.025 -ej 0.04950 3 2 -en 0.05220 2 0.025 -ej 0.05225 2 1 | gzip -c > data/ms.sim1.out.gz

2.1.2. results

find the partitions combination with maximum mean of off-diagnal sub matrix.

file = "data/ms.sim1.out.gz"
X = parseMsOut(file)
println("the size of X is : ", size(X))
saveAsTreemixInput(X, "data/treemix.sim1.in.gz")
println("the results of partitioning by V:")
println(findMaximisingPartitionInV(hatV(X)))
println("the results of partitioning by Σ:")
println(findMaximisingPartitionInV(hatΣ(X)))
println("the results of partitioning by W:")
println(findMaximisingPartitionInV(hatW(X)))

started parsing ms output file
the size of X is : (400,)
20,1225747
the results of partitioning by V:
[[1], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]]
the results of partitioning by Σ:
[[1], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]]
the results of partitioning by W:
[[1, 2, 3, 4, 5, 6, 7, 8, 9], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]]

the original plot

plotAll(X, c=:redsblues, colorbar=false)

ori.cov1.svg

plots to show the gradients

plotAll2(X, c=:redsblues, colorbar=false)

2.1.3. Treemix plots

## system("treemix -i data/treemix.sim1.in.gz -o data/treemix.sim1.out -root p1 ", ignore.stdout = T, ignore.stderr = F)
tm = plot_tree("data/treemix.sim1.out", plus = 0.001, disp = 0.0001)

2.2. long branch

2.2.1. commands

In contrast to short branch, we multiplied all branch lengths by 50. We also simulate 400 independent regions for 20 populations with 20 samples each population without migrations.

ms 400 400 -t 200 -r 200 500000 -I 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 -en 0.135 20 0.025 -ej 0.1375 20 19 -en 0.2725 19 0.025 -ej 0.275 19 18 -en 0.41 18 0.025 -ej 0.4125 18 17 -en 0.5475 17 0.025 -ej 0.55 17 16 -en 0.685 16 0.025 -ej 0.6875 16 15 -en 0.8225 15 0.025 -ej 0.825 15 14 -en 0.96 14 0.025 -ej 0.9625 14 13 -en 1.0975 13 0.025 -ej 1.1 13 12 -en 1.235 12 0.025 -ej 1.2375 12 11 -en 1.3725 11 0.025 -ej 1.375 11 10 -en 1.51 10 0.025 -ej 1.5125 10 9 -en 1.6475 9 0.025 -ej 1.65 9 8 -en 1.785 8 0.025 -ej 1.7875 8 7 -en 1.9225 7 0.025 -ej 1.925 7 6 -en 2.06 6 0.025 -ej 2.0625 6 5 -en 2.1975 5 0.025 -ej 2.2 5 4 -en 2.335 4 0.025 -ej 2.3375 4 3 -en 2.4725 3 0.025 -ej 2.475 3 2 -en 2.61 2 0.025 -ej 2.6125 2 1 | gzip -c >data/ms.sim2.out.gz

2.2.2. results

find the partitions combination with maximum mean of off-diagnal sub matrix.

file2 = "data/ms.sim2.out.gz"
X2 = parseMsOut(file2)
saveAsTreemixInput(X2, "data/treemix.sim2.in.gz")
println("the results of partitioning by V:")
println(findMaximisingPartitionInV(hatV(X2)))
println("the results of partitioning by Σ:")
println(findMaximisingPartitionInV(hatΣ(X2)))
println("the results of partitioning by W:")
println(findMaximisingPartitionInV(hatW(X2)))

started parsing ms output file
20,6530862
the results of partitioning by V:
[[1], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]]
the results of partitioning by Σ:
[[1], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]]
the results of partitioning by W:
[[1, 2, 3, 4, 5, 6, 7, 8], [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]]

the original plot

plotAll(X2, c=:redsblues, colorbar=false)

ori.cov2.svg

plots to show the gradients

plotAll2(X2, c=:redsblues, colorbar=false)

2.2.3. Treemix plots

## par(mfrow=c(1,2))
## system("treemix -i data/treemix.sim2.in.gz -o data/treemix.sim2.root.out -root p1 -k 100", ignore.stdout = T, ignore.stderr = F)
tm = plot_tree("data/treemix.sim2.root.out", plus = 0.002, disp = 0.0001)
## system("treemix -i data/treemix.sim2.in.gz -o data/treemix.sim2.out -k 100", ignore.stdout = T, ignore.stderr = F)
## tm = plot_tree("data/treemix.sim2.out", mbar = F, plus = 0.001, disp = 0.0001)

3. Real Dataset

3.1. 1000 Genomes

f = "data/1kg.6pops.in.gz"
Xk = parseTreemixInput(f)
println("the size of input matrix is ", size(Xk))
println("the results of partitioning by V:")
printMaximisingPartitions(hatV(Xk), names(Xk, 1))
println("the results of partitioning by W:")
printMaximisingPartitions(hatW(Xk), names(Xk, 1))

Xk2 = Xk[[1,2,3,5,6], :]
println("\nafter removing LWK from the input")
println("the results of partitioning by V:")
printMaximisingPartitions(hatV(Xk2), names(Xk2, 1))
println("the results of partitioning by W:")
printMaximisingPartitions(hatW(Xk2), names(Xk2, 1))

the populations are:
YRI CHB CDX LWK CEU FIN
the size of input matrix is (6, 4391887)
the results of partitioning by V:
1st partition:["YRI", "LWK"]
2nd partition:["CHB", "CDX", "CEU", "FIN"]
the results of partitioning by W:
1st partition:["YRI", "LWK"]
2nd partition:["CHB", "CDX", "CEU", "FIN"]

after removing LWK from the input
the results of partitioning by V:
1st partition:["YRI"]
2nd partition:["CHB", "CDX", "CEU", "FIN"]
the results of partitioning by W:
1st partition:["YRI", "CEU", "FIN"]
2nd partition:["CHB", "CDX"]

plotAll(Xk, c=:redsblues, colorbar=false)

ori.kg.svg

plotAll2(Xk, c=:redsblues, colorbar=false)

## system("treemix -i data/1kg.6pops.in.gz -o data/treemix.kg.6pops.out -root YRI ", ignore.stdout = T, ignore.stderr = F)
tm = plot_tree("data/treemix.kg.6pops.out", plus = 0.003, disp = 0.0001)

3.2. Human origins

f = "data/treemix.origins.sub1.gz"
Xo = parseTreemixInput(f)
println("the size of input matrix is ", size(Xo))
println("the results of partitioning by V:")
printMaximisingPartitions(hatV(Xo), names(Xo, 1))
println("the results of partitioning by W:")
printMaximisingPartitions(hatW(Xo), names(Xo, 1))
Xo2 = Xo[1:8, :]
println("after removing Mozabite [9] from the input")
println("the results of partitioning by V:")
printMaximisingPartitions(hatV(Xo2), names(Xo2, 1))
println("the results of partitioning by W:")
printMaximisingPartitions(hatW(Xo2), names(Xo2, 1))

Xo2 = Xo[[1,2,3,4,5,6,7,9], :]
println("\nafter removing Ju_hoan_North [8] from the input")
println("the results of partitioning by V:")
printMaximisingPartitions(hatV(Xo2), names(Xo2, 1))
println("the results of partitioning by W:")
printMaximisingPartitions(hatW(Xo2), names(Xo2, 1))

Xo2 = Xo[1:7, :]
println("\nafter removing Ju_hoan_North [8] and Mozabite [9] from the input")
println("the results of partitioning by V:")
printMaximisingPartitions(hatV(Xo2), names(Xo2, 1))
println("the results of partitioning by W:")
printMaximisingPartitions(hatW(Xo2), names(Xo2, 1))

plotAll(Xo, c=:redsblues, colorbar=false)

ori.origins.sub1.svg

plotAll2(Xo, c=:redsblues, colorbar=false)

origins.sub1.svg

Treemix inference

par(mfrow=c(1,2))
system("treemix -i data/treemix.origins.sub2.gz -o data/treemix.origins.sub2.out -root Mbuti ", ignore.stdout = T, ignore.stderr = F)
tm = plot_tree("data/treemix.origins.sub2.out")
system("treemix -i data/treemix.origins.sub1.gz -o data/treemix.origins.sub1.out -root Mbuti ", ignore.stdout = T, ignore.stderr = F)
tm = plot_tree("data/treemix.origins.sub1.out")