ANGSD: Analysis of next generation Sequencing Data
Latest tar.gz version is (0.938/0.939 on github), see Change_log for changes, and download it here.
Korneliussen2013: Difference between revisions
Jump to navigation
Jump to search
(Created page with "Thorfinn Sand Korneliussen, Ida Moltke, Anders Albrechtsen and Rasmus Nielsen; Calculation of Tajima's D and other neutrality test statistics from low depth next-generation se...") |
No edit summary |
||
(2 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
Thorfinn Sand Korneliussen, Ida Moltke, Anders Albrechtsen and Rasmus Nielsen; Calculation of Tajima's D and other neutrality test statistics from low depth next-generation sequencing data; | Thorfinn Sand Korneliussen, Ida Moltke, Anders Albrechtsen and Rasmus Nielsen; Calculation of Tajima's D and other neutrality test statistics from low depth next-generation sequencing data; | ||
A description of how to use the method is described here: [[Tajima]] | |||
=Open Access= | |||
http://www.biomedcentral.com/1471-2105/14/289 | |||
=Bibtex= | =Bibtex= | ||
<pre> | <pre> | ||
@article{korneliussen_calculation_2013, | |||
title = {Calculation of Tajima's D and other neutrality test statistics from low depth next-generation sequencing data}, | |||
volume = {14}, | |||
issn = {1471-2105}, | |||
url = {http://www.biomedcentral.com/1471-2105/14/289}, | |||
doi = {10.1186/1471-2105-14-289}, | |||
abstract = {{BACKGROUND:A} number of different statistics are used for detecting natural selection using {DNA} sequencing data, including statistics that are summaries of the frequency spectrum, such as Tajima's D. These statistics are now often being applied in the analysis of Next Generation Sequencing ({NGS)} data. However, estimates of frequency spectra from {NGS} data are strongly affected by low sequencing coverage; the inherent technology dependent variation in sequencing depth causes systematic differences in the value of the statistic among genomic {regions.RESULTS:We} have developed an approach that accommodates the uncertainty of the data when calculating site frequency based neutrality test statistics. A salient feature of this approach is that it implicitly solves the problems of varying sequencing depth, missing data and avoids the need to infer variable sites for the analysis and thereby avoids ascertainment problems introduced by a {SNP} discovery {process.CONCLUSION:Using} an empirical Bayes approach for fast computations, we show that this method produces results for low-coverage {NGS} data comparable to those achieved when the genotypes are known without uncertainty. We also validate the method in an analysis of data from the 1000 genomes project. The method is implemented in a fast framework which enables researchers to perform these neutrality tests on a genome-wide scale.}, | |||
number = {1}, | |||
journal = {{BMC} Bioinformatics}, | |||
author = {Korneliussen, Thorfinn and Moltke, Ida and Albrechtsen, Anders and Nielsen, Rasmus}, | |||
year = {2013}, | |||
pages = {289} | |||
} | |||
</pre> | </pre> | ||
Latest revision as of 14:03, 3 October 2013
Thorfinn Sand Korneliussen, Ida Moltke, Anders Albrechtsen and Rasmus Nielsen; Calculation of Tajima's D and other neutrality test statistics from low depth next-generation sequencing data;
A description of how to use the method is described here: Tajima
Open Access
http://www.biomedcentral.com/1471-2105/14/289
Bibtex
@article{korneliussen_calculation_2013, title = {Calculation of Tajima's D and other neutrality test statistics from low depth next-generation sequencing data}, volume = {14}, issn = {1471-2105}, url = {http://www.biomedcentral.com/1471-2105/14/289}, doi = {10.1186/1471-2105-14-289}, abstract = {{BACKGROUND:A} number of different statistics are used for detecting natural selection using {DNA} sequencing data, including statistics that are summaries of the frequency spectrum, such as Tajima's D. These statistics are now often being applied in the analysis of Next Generation Sequencing ({NGS)} data. However, estimates of frequency spectra from {NGS} data are strongly affected by low sequencing coverage; the inherent technology dependent variation in sequencing depth causes systematic differences in the value of the statistic among genomic {regions.RESULTS:We} have developed an approach that accommodates the uncertainty of the data when calculating site frequency based neutrality test statistics. A salient feature of this approach is that it implicitly solves the problems of varying sequencing depth, missing data and avoids the need to infer variable sites for the analysis and thereby avoids ascertainment problems introduced by a {SNP} discovery {process.CONCLUSION:Using} an empirical Bayes approach for fast computations, we show that this method produces results for low-coverage {NGS} data comparable to those achieved when the genotypes are known without uncertainty. We also validate the method in an analysis of data from the 1000 genomes project. The method is implemented in a fast framework which enables researchers to perform these neutrality tests on a genome-wide scale.}, number = {1}, journal = {{BMC} Bioinformatics}, author = {Korneliussen, Thorfinn and Moltke, Ida and Albrechtsen, Anders and Nielsen, Rasmus}, year = {2013}, pages = {289} }
doi
doi:10.1186/1471-2105-14-289