/* This is HMMtest part of relateHMM last saved 25/03 2009 */ /* Copyright (C) 2009 Thorfinn Sand Korneliussen thorfinn@binf.ku.dk This file is part of Relate 0.98 HMMld is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Foobar is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with HMMld. If not, see . */ /* This file is just a fast smackdown of the method! This source code is not up to my useal coding standard, Actually I'm somewhat embarrased. and if time permits, the code will be re-done */ #include #include #include #include #include #include #include #include #ifndef _types_h #define _types_h #include "types.h" #endif #include "alloc.h" #include "conf.h" using namespace std; iArray *affectList = NULL; iArray *nonAffectList = NULL; iArray *permAffectList = NULL; iArray *permNonAffectList = NULL; //iArray *dFileList = NULL; iArray *indexList = NULL; iMatrix *indMatrix = NULL; fMatrix *post = NULL; fMatrix *covarMatrix = NULL; fArray *K = NULL; fArray *position = NULL; int n0,n1; ///Converts a integer type to string. std::string to_string(int a){///@param a An integer to convert. std::string str; std::stringstream ss; ss<>str)){ printf("Error converting :%d will exit\n",a); exit(0); } ss>>str; return str; ///@return Returns the string value. } ///Converts a string to integer type. int to_int(std::string a){///@param a a String to convert. int str; float check; std::stringstream ss; std::stringstream ss2; ss<>str)){ printf("Error converting :\"%s\" will exit\n",a.c_str()); exit(0); } //now check if numberis really an int ss2<>check)){ printf("Error converting :\"%s\" will exit\n",a.c_str()); exit(0); } if(check!=str){ printf("String you want as in integer is not really an integer: \"%s\"\n",a.c_str()); puts("Just a warning"); } return str;///@return Function returns the integer value. } ///Convert string to float. float to_float(std::string a){///@param a is the string to convert. float str; std::stringstream ss; ss<>str)){ printf("Error converting :\"%s\" will exit\n",a.c_str()); exit(0); } ss>>str; return str;///@return The float value. } ///convert string to double double to_double(std::string a){///@param a is the string to convert. double str; std::stringstream ss; ss<>str)){ printf("Error converting :\"%s\" will exit\n",a.c_str()); exit(0); } ss>>str; return str;///@return The double value. } void typecast_stringarray_to_fArray(const std::vector &tokens,fArray *mat,int every){ for (unsigned int i=0;iarray[i/every] = to_float( tokens.at(i)); } void typecast_stringarray_to_fMatrix(const std::vector &tokens,fMatrix *mat){ int start=0; // printf("num element to typecast:%d\n",tokens.size()); fflush(stdout); for (int i=0;ix;i++) for(int j=0;jy;j++){ if((start%100000)==0){ printf("\r\t\t at:%d",start); fflush(stdout); } mat->matrix[i][j] = to_float( tokens.at(start)); start++; } } ///Mother-of-all string tokenizers! Function will split a string from a given set of delimiters. int get_lexemes(std::string str,std::vector &lexems,const std::string delims){ ///@param str The string to be splittet. ///@param lexems A reference to a list of strings in which the splittet string will be put as seperate elements. A queue. ///@param delims A string containing all the chars that the function will split by. char *token = strtok( const_cast( str.c_str() ),delims.c_str()); int numToks = 0; while ( token != NULL ){ numToks++; lexems.push_back( token); token = strtok( NULL,delims.c_str() ); } return numToks;///@return The number of strings that has been splittet. } ///Reads all lines in a file, splits it, and then input the values into the internal datastructure. fArray *readarray(std::string filename,const std::string delims=",;: \t",int every=1){ ///@param filename The name of the file to open. ///@param delims A string of delimeters to split by. std::vector tokens; const int SIZE=MAX_ELEMS_PER_LINE; char buffer[SIZE]; std::ifstream pFile (filename.c_str(),std::ios::in); if(!pFile){ std::cout <<"Problems opening file" <every; ///@param filename A filename to read.@param delim A string of delimiters. if(1){ printf("\t-> will try to open postfile: \"%s\" ... \n",filename.c_str()); fflush(stdout); } const int SIZE = MAX_ELEMS_PER_LINE; char buffer[SIZE]; std::ifstream pFile (filename.c_str(),std::ios::in); if(!pFile){ std::cout <<"Problems opening file" < checking dims of posterior matrix file:\n"); int numTrueRow=-1; while(!pFile.eof()){ numTrueRow++; std::vector tokens; std::string tmp_string; pFile.getline(buffer,SIZE); if ((numTrueRow%2)==1&&pars->k2!=0) continue; tmp_string = std::string(buffer); if(doFirstRow){ //if file has a emptystart line itemsInFirstRow = get_lexemes(tmp_string,tokens,delim); if (itemsInFirstRow==0) continue; // printf("items in first rwo:%d\n",itemsInFirstRow); doFirstRow=0; numRows++; } else{ int nItems = get_lexemes(tmp_string,tokens,delim); //if line is empty if(nItems==0) continue; numRows++; if(nItems!=itemsInFirstRow){ printf("row length mismatch at line:%d numitems is:%d shouldn't be:%d\t will exit\n",numRows,itemsInFirstRow,nItems); exit(0); } } if ((numRows%100 )==0){ printf("\r\t\tchecking number of items at line: %d ",numRows); fflush(stdout); } } pFile.close(); printf("\r\t-> File should contain numrows:%d\tnumitems:%d \n",numRows,itemsInFirstRow); fflush(stdout); float number_of_inds =0.5+sqrt(1.0/4.0+numRows*2); if(floor(number_of_inds)!=ceil(number_of_inds)){ printf("\t-> Possible error in posterior file, number of pairs run doesn't divide to int\n"); exit(0); } fMatrix *mat; int mods = (itemsInFirstRow % every); // std::cout << "mods: " << mods < tokens; std::string tmp_string; pFile2.getline(buffer,SIZE); if ((numTrueRow%2)==1&&pars->k2!=0) continue; tmp_string = std::string(buffer); int itemsInRow = get_lexemes(tmp_string,tokens,delim); if (itemsInRow==0) continue; //printf("\nnumRows=%d\titemsInRow=%d\tevery=%d\ttoksize:%d\n",numRows,itemsInRow,every,(int)tokens.size()); for(unsigned int i=0; i matrix[numRows][i/every] = to_float(tokens[i]); } numRows++; } pFile2.close(); //copy(tokens.begin(), tokens.end(), ostream_iterator(cout, ", ")); fflush(stdout); return mat; } /// Reads an entire file and returns a fMatrix. fMatrix *readmatrix_filthy_mem_user(std::string filename,const std::string delim=",;: \t"){ ///@param filename A filename to read.@param delim A string of delimiters. if(1){ printf("\t-> will try to open postfile: \"%s\" ... \n",filename.c_str()); fflush(stdout); } std::vector tokens; const int SIZE = MAX_ELEMS_PER_LINE; char buffer[SIZE]; std::ifstream pFile (filename.c_str(),std::ios::in); if(!pFile){ std::cout <<"Problems opening file" < Possible error in posterior file, number of pairs run doesn't divide to int. Maybe use -k2 1?\n"); exit(0); } fMatrix *mat = allocFloatMatrix(numRows,itemsInFirstRow); //now we have a token array of string coerce the types now printf("\ntypecasting\t and has alloc a mat of sise:(%d,%d)\n",numRows,itemsInFirstRow); typecast_stringarray_to_fMatrix(tokens,mat); //copy(tokens.begin(), tokens.end(), ostream_iterator(cout, ", ")); printf("typecats\n"); fflush(stdout); return mat; } void write_matrix(const char* str,fMatrix *mat){ ofstream myfile; myfile.open (str); for(int i=0;ix;i++){ for(int j=0;jy-1;j++) myfile <matrix[i][j]<<" "; myfile <matrix[i][mat->y-1]<<"\n"; } myfile.close(); } void write_array(const char* str,fArray *ary){ ofstream myfile; myfile.open (str); for(int i=0;ix-1;i++) myfile <array[i]<<"\t"; myfile <array[ary->x-1]<<"\n"; myfile.close(); } void generate_affectLists(iArray *boolList){ int len=0;//affectlist int len2=0;//nonaffectlist for (int i=0;ix;i++){ if(boolList->array[i]==1){ affectList->array[len] = i; len++; } else if (boolList->array[i]==2) { nonAffectList->array[len2] = i; len2++; } } } int fileExists(const char *pName){ ifstream pFile (pName); if(!pFile) return 0; return 1; } fMatrix *getCoVar(const char *pName) { if(1){ printf("\t-> Will try to open: \"%s \" ...",pName); fflush(stdout); } int ind=0; int snp=0; fMatrix *data; //change const int SIZE=MAX_ELEMS_PER_LINE; char buffer[SIZE]; char *buffer_ar; ifstream pFile (pName); if(!pFile){ cout <<"file doesn't exists will exit:" <matrix[i][s]=inDataSet; buffer_ar = strtok(NULL, " "); s++; } } pFile1.close(); if(1){ printf("done reading dims=(%d,%d)\n",data->x,data->y); } return data; } void fix_missing(){ printf("\n\t-> Will infer missing values in posterier matrix..."); fArray *difPos = allocFloatArray(position->x); //calculate differences in position; // cout<<"npos="<x<x;i++){ difPos->array[i-1]=position->array[i]-position->array[i-1]; // cout<array[i-1]<array,difPos->x); float dif1=0; float dif2=0; float difSum=0; int numberOfMissing=0; float firstNonMissing =0; float nextNonMissing =0; /* ok, this is what happens, we need to remove all NA / -1's, for each row 1. check from the left weather the first elem is NA 2. check from the right weather the last elem is NA 3. run through entire row */ for(int i=0;ix;i++) { // 1. if(post->matrix[i][0]==-1){ numberOfMissing =1; while(post->matrix[i][numberOfMissing]==-1 && (numberOfMissing)y) numberOfMissing++; nextNonMissing = post->matrix[i][numberOfMissing]; for(int j=0;jmatrix[i][j] = nextNonMissing; } numberOfMissing = 0; nextNonMissing = 0; firstNonMissing =0; // 2. if(post->matrix[i][post->y-1]==-1){ // cout <<"hit"<matrix[i][post->y-1-numberOfMissing]==-1 && numberOfMissingy) numberOfMissing++; //cout <<"\t ->2. number of missing:"<matrix[i][post->y-1-numberOfMissing]; //cout <<"\t ->3. nextNonMissing:"<matrix[i][post->y-1-j] = nextNonMissing; } numberOfMissing = 0; nextNonMissing = 0; firstNonMissing =0; // 3. Anders much for(int j=1;jy-1;j++){ if (post->matrix[i][j]==-1){ firstNonMissing = post->matrix[i][j-1]; nextNonMissing= post->matrix[i][j+1]; dif2 = difPos->array[j+1]; int o=1; while(nextNonMissing==-1){ o++; dif2 =+ difPos->array[j+o]; nextNonMissing = post->matrix[i][j+o]; if(o>100000) cout<<"You are so screwed!"<array[j-1]; difSum = dif1+dif2; float tmp1 = (dif1/difSum)*firstNonMissing; float tmp2 = (dif2/difSum)*nextNonMissing; // cout<<"tmp1="<matrix[i][j] =tmp1 + tmp2; if(post->matrix[i][j]<0&&post->matrix[i][j+numberOfMissing]!=-1){ cout<<"fucked"<postFileName),pars,";: \t"); } fArray *getKthorfinn(const char *pName, int isK0){ fArray *kFromfile = readarray(string(pName),";: \t",1); //now post process the k's if(isK0!=0) for (int i=0 ; ix ;i++) kFromfile->array[i] =1 -kFromfile->array[i]; return kFromfile; } /// checks if cmpVar exists in ary. int exists(int cmpVar, iArray *ary){ for(int i=0;ix;i++) if(cmpVar==ary->array[i]) return 1; return 0; } void getCaseList(char *pName, int ccAll){ std::vector tokens; const int SIZE=MAX_ELEMS_PER_LINE; char buffer[SIZE]; std::ifstream pFile (pName,std::ios::in); if(!pFile){ std::cout <<"Problems opening file" <x ; i++){ //changed per anders request int tmp = to_int( tokens.at(i)); if(tmp) dFileList->array[i] = 0; else dFileList->array[i] = 1; } int len=0;//this is the number of affected individuals int len2=0;//this is the number of non-affected individuals. for(int i=0;ix;i++) if(dFileList->array[i]==1) len++; else if(dFileList->array[i]!=-1)//these should be discarded len2++; int len3 = dFileList->x - len - len2; printf("(%d,%d,%d)\n",len,len2,len3); n1 = len; n0 = len2; //now create an array of length=len that contains id of affected individuals iArray *idList = allocIntArray(len); //list of ids of affected individuals iArray *discardList = allocIntArray(len3); //list of ids that shouldn't be used in analysis. len=0; len3=0; for(int i=0;ix;i++){ if(dFileList->array[i]==1){ idList->array[len]=i; len++; } if(dFileList->array[i]==-1){ //cout << "hit:"<array[len3]=i; len3++; } } // print_array(discardList); //this makes the famous list matrix //length is length of dfile multiplied by length minus one halfs indMatrix = allocIntMatrix((dFileList->x*(dFileList->x-1))/2,2); int xvar=0; int yvar=0; for(int i=0;(ix-1 &&xvarx);i++) for(int j=i+1;jx;j++){ indMatrix->matrix[xvar][yvar] = i; yvar++; indMatrix->matrix[xvar][yvar] = j; xvar++; yvar=0; } /* //print indMatrix for(int i=0;ix;i++) printf("%d\t%d\n",indMatrix->matrix[i][0],indMatrix->matrix[i][1]); */ //now checks which rows that contains dlb affected individuals indexList = allocIntArray(indMatrix->x); len=0; len2=0; len3 =0 ; for(int i=0;ix;i++) if((exists(indMatrix->matrix[i][0],idList)) && (exists(indMatrix->matrix[i][1],idList))){ indexList->array[i] = 1; len++; } else if((exists(indMatrix->matrix[i][0],discardList)) || (exists(indMatrix->matrix[i][1],idList))){ indexList->array[i] = -1; //len2++; } else if(!(exists(indMatrix->matrix[i][0],idList)) && !(exists(indMatrix->matrix[i][1],idList))){ indexList->array[i] = 2; len2++; } else{ len3++; } //cout <<"\nlen:"<x;i++) if(indexList->array[i]==0){ indexList->array[i] = 2; len2++; } } else if (ccAll==1)//this is missing in affectList for(int i=0;ix;i++) if(indexList->array[i]==0){ indexList->array[i]=1; len++; } //print_array(boolList->array,boolList->x); affectList = allocIntArray(len); nonAffectList = allocIntArray(len2); len=0;//affectlist len2=0;//nonaffectlist generate_affectLists(indexList); if(0){ affectList->print("affectedlist",NULL); nonAffectList->print("non affect list",NULL); } printf("\n\t-> n=%d,\tn0=%d,\tn1=%d\t,numCases=%d,\tnumControls:%d\n",dFileList->x,n0,n1,affectList->x,nonAffectList->x); killArray(idList); killArray(dFileList); killArray(discardList); } float get_mean(fArray *ary,iArray *extractList){ float tmpSum=0; float result; for (int i=0;ix;i++){ // printf("ary->array[extractList->array[i]]:%f\n",ary->array[extractList->array[i]]); tmpSum += ary->array[extractList->array[i]]; } result = tmpSum/extractList->x; return result; } float covar_no_missing_k1(fMatrix *post,fArray *K,int tal1,int tal2){ float tmpSum=0; float result=0; for (int i=0;iy;i++){ tmpSum += (post->matrix[tal1][i] -K->array[tal1])*(post->matrix[tal2][i] -K->array[tal2]); } result = tmpSum/(post->y-1.0); //printf("res is:%f\n",result); return result; } float covar_with_missing_k1(fMatrix *post,fArray *K,int tal1,int tal2){ int m=0; float tmpSum=0; float result=0; for (int i=0;iy;i++){ if(post->matrix[tal1][i]!=-1&&post->matrix[tal2][i]!=-1){ tmpSum += (post->matrix[tal1][i]-K->array[tal1])*(post->matrix[tal2][i] - K->array[tal2]); m++; } } result = tmpSum/(float(m)-1.0); //cout << "m="< Calculating covariance matrix...\n\t presuming K list is K1, and no missing exists"<x,post->x); //exit(0); //float tmpSum=0; for (int i=0;ix;i++){//N //ask anders, for (int j=i;jx;j++){//m // printf("\t->i=%d\tj=%d\t",i,j);U<-apply(post[cases,],2,mean,na.rm=T) retMat->matrix[i][j] = covar_no_missing_k1(post,K,i,j); retMat->matrix[j][i] = retMat->matrix[i][j]; } } } else if(missingExists==1){ cout <<"\n\t-> Calculating covariance matrix...\n\t presuming K list is K1, and missing exists"<x,post->x); //float tmpSum=0; for (int i=0;ix;i++){//N for (int j=i;jx;j++){//m // printf("\t->i=%d\tj=%d\t",i,j);U<-apply(post[cases,],2,mean,na.rm=T) retMat->matrix[i][j] = covar_with_missing_k1(post,K,i,j); retMat->matrix[j][i] = retMat->matrix[i][j]; } } } else cout<<"error "<y); float tmpSum=0; for (int i=0;iy;i++){ tmpSum=0; for(int j=0;jx;j++) tmpSum += post->matrix[extractList->array[j]][i]; retAry->array[i] = tmpSum/(float)extractList->x; } return retAry; } fArray *column_wise_mean_with_missing(fMatrix *post,iArray *extractList){ fArray *retAry = allocFloatArray(post->y); float tmpSum=0; for (int i=0;iy;i++){ tmpSum=0; int iNotMissing=0; for(int j=0;jx;j++) if(post->matrix[extractList->array[j]][i] != -1){ iNotMissing++; tmpSum += post->matrix[extractList->array[j]][i]; } retAry->array[i] = tmpSum/(float)iNotMissing; } return retAry; } /* calculates rowwise mean */ fArray *row_wise_mean_no_missing(fMatrix *post,iArray *extractlist){ // std::cout << "\nrow_wise_mean_\n"; fArray *retAry = allocFloatArray(post->x); float tmpSum=0; for (int i=0;ix;i++){ tmpSum=0; for(int j=0;jy;j++) tmpSum += post->matrix[i][j]; retAry->array[i] = tmpSum/(float)post->y; //std::cout<array[i]<x); float tmpSum=0; for (int i=0;ix;i++){ tmpSum=0; int iNotMissing=0; for(int j=0;jy;j++) if(post->matrix[i][j]!=-1){ tmpSum += post->matrix[i][j]; iNotMissing++; } retAry->array[i] = tmpSum/(float)iNotMissing; } return retAry; } float sum_of_covar_matrix(fMatrix *mat,iArray *okList){ float returnValue=0; for (int i=0;ix;i++) for (int j=0;jx;j++) returnValue += mat->matrix[okList->array[i]][okList->array[j]]; return returnValue; } void make_datastructures(functionPars2 *pars){ cout<<"\t-> Getting post file... "; post = getPost1(pars); /* for(int i=0;ix;i++){ for(int j=0;jy;j++) printf("%f ",post->matrix[i][j]); puts(""); } */ //positions might exist if(pars->posFileName != NULL){ //cout<<"getting position file"<posFileName,pars->every); // for(int i=0;ix;i++) printf("%f ",position->array[i]); if(post->y!=position->x){ printf("\t-> Number of SNP's in posteroir file is different from positionfile %d!=%d ...will exit",post->y,position->x); exit(0); } } //check if we should fix missing if(pars->infer==1&&pars->posFileName!=NULL){ fix_missing(); pars->missingExists =0;//after infering the missing values, no more missing values exists; if(pars->dumpPostFileName!=NULL){ printf("\t-> Will dump infered posterier matrix in file: %s",pars->dumpPostFileName); write_matrix(pars->dumpPostFileName,post); cout<<" done "<infer==1&&pars->posFileName==NULL) { printf("\n\t-> Position file required for removing missing \n"); exit(0); } // change K0 to K1 (+K2) for(int y=0;yy;y++) for(int x=0;xx;x++) post->matrix[x][y] = 1 - post->matrix[x][y]; //get list of affected and un affected individuals; if (pars->dFileName != NULL){ getCaseList(pars->dFileName,pars->ccAll); } else { //generate affect list affectList = allocIntArray(post->x); for(int i=0;ix;i++) affectList->array[i]=i; // printf("\t-> n=%d,\tn0=%d,\tn1=%d\t,numCases=%d,\tnumControls:%d\n",dFileList->x,n0,n1,affectList->x,nonAffectList->x); printf("n=%d (presumes all individuals are affected)\n",post->x); //screen output //generated dFile list; indexList = allocIntArray((unsigned long int)(post->x*((unsigned long int)post->x-1))/2); // exit(0); for(int i=0;i<5;i++) indexList->array[i]=1; } //printf("af.len:%d\n",affectList->x); //if we were given a kfilename then read it if(!pars->kFileName.empty()){ K = getKthorfinn(pars->kFileName.c_str(),pars->isK0); } else{ printf("\t-> Calculating the expected allele sharing... "); if(pars->missingExists==0) K = row_wise_mean_no_missing(post,affectList);//second arg. doesn't matter shit else K = row_wise_mean_with_missing(post,affectList);//second arg doesn't matter shit printf(" done \n"); } } double d_uniform_01 ( int *seed ){ int k; double r; k = *seed / 127773; *seed = 16807 * ( *seed - k * 127773 ) - k * 2836; if ( *seed < 0 ) *seed = *seed + 2147483647; r = ( double ) ( *seed ) * 4.656612875E-10; return r; } int i_uniform ( int b, int c, int *seed ){ int i; i = b + ( int ) ( d_uniform_01 ( seed ) * ( double ) ( c + 1 - b ) ); if ( i < b ) i = b; if ( c < i ) i = c; return i; } void i_swap ( int *i, int *j ){ int k; k = *i; *i = *j; *j = k; return; } int *perm_uniform ( int n, int *seed ){ int i; int j; int *p; p = new int[n]; for ( i = 1; i <= n; i++ ) { p[i-1] = i; } for ( i = 1; i <= n; i++ ) { j = i_uniform ( i, n, seed ); i_swap ( &p[i-1], &p[j-1] ); } return p; } void permutate_dFileList(){ iArray *permlist = allocIntArray(n1); int *perm; int new_seed = rand(); int len=0; int len2=0; perm = perm_uniform (n0+n1, &new_seed); for(int i=0;iarray[i]=perm[i]-1; for(int i=0;ix;i++){ if((exists(indMatrix->matrix[i][0],permlist)) && (exists(indMatrix->matrix[i][1],permlist))){ indexList->array[i] = 1; len++; } else if(!(exists(indMatrix->matrix[i][0],permlist)) && !(exists(indMatrix->matrix[i][1],permlist))){ indexList->array[i] = 2; len2++; } else indexList->array[i] = 0; } // print_array(indexList->array,indexList->x); // print_array(permlist->array,permlist->x); // killArray(indexList); generate_affectLists(indexList); // print_array(nonAffectList->array,nonAffectList->x); // print_array(affectList->array,affectList->x); killArray(permlist); delete[] perm; } fArray *fArray_plus_float(fArray *ary,float a){ fArray *retVal = allocFloatArray(ary->x); for(int i=0;ix;i++) retVal->array[i]=ary->array[i]+a; return retVal; } fArray *fArray_minus_fArray(fArray *ary1,fArray *ary2){ fArray *retVal = allocFloatArray(ary1->x); for(int i=0;ix;i++) retVal->array[i]=ary1->array[i]-ary2->array[i]; return retVal; } void fArray_div(fArray *ary,float a){ for(int i=0;ix;i++){ //printf("\t\t dividing:%f\t with %f\n",ary->array[i],a); ary->array[i]=ary->array[i]/a; } } float getMax_elem(fArray *ary){ float result = ary->array[0]; for (int i=1;ix;i++){ if (ary->array[i]>result) result = ary->array[i]; } return result; } //memory stabil sort, //O(n^2) complxity void sort(fArray *ary){ float swap; float lowest_value; for (int j=0;jx;j++){ lowest_value = ary->array[j]; for(int i=j;ix;i++){ if (ary->array[i]array[j]){ swap = ary->array[i]; ary->array[i] = ary->array[j]; ary->array[j] = swap; } } } } void uber_main(functionPars2 *pars) { //printf("pars missingExists:%d\n",pars->missingExists); //initialize datastructures make_datastructures(pars); //printf("pars missingExists:%d\n",pars->missingExists); //calculate covariance matrix; string covarFileName =string( pars->cFileName); if(pars->kFileName.empty()) covarFileName =covarFileName + ".E1.txt"; else covarFileName =covarFileName + ".E2.txt"; if (fileExists(covarFileName.c_str())==0){ printf("\t-> Calculating the covariance matrix - may take several minutes ... "); fflush(stdout); covarMatrix = calculateCoVar(pars->missingExists); if(pars->cFileName.compare("")){ printf("\t->Will dump calculated covariance matrix in file: %s",covarFileName.c_str()); write_matrix(covarFileName.c_str(),covarMatrix); cout<<" done"< Will read covariance matrix from file - may take several minutes ... \n"); covarMatrix = getCoVar(covarFileName.c_str()); } if(covarMatrix->x!=covarMatrix->y||covarMatrix->x!=post->x){ printf("\t-> Dimension of covariate matrix doesn't equal dimension of posterior matrix\n"); exit(0); } //get_column_wise mean fArray *U; if(pars->missingExists) U = column_wise_mean_with_missing(post,affectList); else U = column_wise_mean_no_missing(post,affectList); //cout <<"________________this is U_______________"<x;i++) // printf("K->array[%d]=%f\n",i,K->array[i]); float mn = get_mean(K,affectList); //cout <<"mean :" << mn <x)*(affectList->x)); // cout << "sumCovar:" <design==1){ write_array(pars->outFileName.c_str(),Z); } else if(pars->dFileName!=NULL) { printf("\t->Will start case control analays\n"); //do perm test fArray *U1 = NULL; fArray *U0 = NULL; iArray *psim = allocIntArray(post->y); //iterative resulthulder fArray *rowMax = allocFloatArray(pars->nSim);//iteratie resultholder //for(int i=0;ix;i++) printf("%f ",K->array[i]); float mn1 = get_mean(K,affectList); float mn0 = get_mean(K,nonAffectList); //cout<missingExists){ U1 = column_wise_mean_with_missing(post,affectList); U0 = column_wise_mean_with_missing(post,nonAffectList); }else{ U1 = column_wise_mean_no_missing(post,affectList); U0 = column_wise_mean_no_missing(post,nonAffectList); } fArray *Z1 = fArray_plus_float(U1,-mn1); fArray *Z0 = fArray_plus_float(U0,-mn0); if(0){ U1->print("\t-> This is U1 inX0",NULL); U0->print("\t-> This is U0 inX0",NULL); } float varU1 = sum_of_covar_matrix(covarMatrix,affectList)/((affectList->x)*(affectList->x));//number of non -missing affected anders float varU0 = sum_of_covar_matrix(covarMatrix,nonAffectList)/((nonAffectList->x)*(nonAffectList->x));//number if non missin affected anders float bottom= sqrt((((nonAffectList->x-1)*varU0+(affectList->x-1)*varU1)/(nonAffectList->x+affectList->x-2)) * (1.0/float(nonAffectList->x)+1.0/float(affectList->x))); fArray *X0 = fArray_minus_fArray(Z1,Z0); if(0){ X0->print("\n-> This is U1 - U0 inX0",NULL); } fArray_div(X0,bottom); write_array(pars->outFileName.c_str(),X0); //anders if(0){ X0->print("this is bottom value\n\t-> This is X0",NULL); } killArray(U1); killArray(U0); killArray(Z1); killArray(Z0); //each ieration printf("\t->Will start permutations\n"); srand ( time(NULL) ); rowMax->array[0] = getMax_elem(X0); for (int i=1;inSim;i++){ if((i%10)==0){ printf("\r\t\t Has Run: %d permutations",i); fflush(stdout); } permutate_dFileList(); mn1 = get_mean(K,affectList); mn0 = get_mean(K,nonAffectList); // cout<missingExists){ U1 = column_wise_mean_with_missing(post,affectList); U0 = column_wise_mean_with_missing(post,nonAffectList); }else{ U1 = column_wise_mean_no_missing(post,affectList); U0 = column_wise_mean_no_missing(post,nonAffectList); } fArray *Z1 = fArray_plus_float(U1,-mn1); fArray *Z0 = fArray_plus_float(U0,-mn0); varU1 = sum_of_covar_matrix(covarMatrix,affectList)/((affectList->x)*(affectList->x));//affected only varU0 = sum_of_covar_matrix(covarMatrix,nonAffectList)/((nonAffectList->x)*(nonAffectList->x)); //affected only bottom= sqrt((((nonAffectList->x-1)*varU0+(affectList->x-1)*varU1)/(nonAffectList->x+affectList->x-2)) * (1.0/float(affectList->x)+1.0/float(nonAffectList->x))); fArray *Xi = fArray_minus_fArray(Z1,Z0); fArray_div(Xi,bottom); if(0){ Xi->print("\n\t-> Xi",NULL); } //do psum check for(int j=0;j < psim->x;j++){ if(Xi->array[j] >= X0->array[j]) psim->array[j] = psim->array[j]+1; } //get max_elem rowMax->array[i] = getMax_elem(Xi); killArray(U1); killArray(U0); killArray(Z1); killArray(Z0); killArray(Xi); } printf("\n"); // after all perms if(pars->xFileName.compare("")){ printf("\t->Will dump Xmax array in file: %s\n",pars->xFileName.c_str()); write_array(pars->xFileName.c_str(),rowMax); } float pmaxsim =1; for (int i=1;ix;i++) if(rowMax->array[0] <= rowMax->array[i]) pmaxsim++; pmaxsim = pmaxsim/pars->nSim; printf("\t->pmaxsim:%f\n",pmaxsim); float psimVar=0; for (int i=0;ix;i++) psimVar += psim->array[i]; sort(rowMax); int indexToGet = round((1-pars->sig)*rowMax->x)-1; printf("\t->psim/nsim:%f",psimVar/(float)pars->nSim); // printf("\t->indexToGet from rowMax array :%d\tpsigsim:%f\n",indexToGet,rowMax->array[indexToGet]); 0.73 printf("\t\tpsigsim:%f\n",rowMax->array[indexToGet]); //COKE ROX killArray(psim); killArray(rowMax); killArray(X0); } else printf("\t->dFile required for case controll design\n"); //print_matrix(indMatrix); killMatrix(post); killMatrix(covarMatrix); killArray(K); killArray(affectList); killArray(nonAffectList); killArray(U); // killArray(dFile); killMatrix(indMatrix); killArray(indexList); killArray(Z); if(pars->posFileName!=NULL) killArray(position); } void print_functionPars2(functionPars2 *pars){ printf("_________________________\n"); printf("required:\t\t\t\tDefaultValue\n"); printf("\t-P\t:Posterier Matrix\t%s\n",pars->postFileName); printf("\t-p\t:position file\t\t%s\n",pars->posFileName); printf("options:\n"); // printf("\t-K\t%s\t\t :(or -K0) \n",pars->kFileName.c_str());//chg 6/1 2009 printf("\t-D\t:Individuals to test\t%s\n",pars->dFileName); // printf("\t-d\t%d\t\t\t :1: affecteed only, 2: case control\n",pars->design); //chg 6/1 2009 printf("\t-Nsim\t:number of permutions\t%d\n",pars->nSim); printf("\t-ccAll\t:ccAll=0 strict,\t%d\n\t\t:ccAll=2 NA in controls,\n\t\t:ccAll=1 NA in cases \n",pars->ccAll); printf("\t-sig\t:significans\t\t%f\n",pars->sig); printf("\t-infer\t:infer=1 infer missing\t%d\n",pars->infer); printf("\t-every\t:=2 use every 2.snp\t%d\n",pars->every); // printf("\t-NA\t%d\t\t\t :NA=1 Missing exists in posterier\n",pars->missingExists); printf("outfiles:\n"); printf("\t-c\t:dump covariance matrix\t%s\n",pars->cFileName.c_str()); printf("\t-x\t:dump Xmax array\t%s\n",pars->xFileName.c_str()); printf("\t-o\t:dump posterior file\t%s\n",pars->dumpPostFileName); printf("\t-f:output file\t\t%s\n",pars->outFileName.c_str()); printf("_________________________\n"); } int main(int argc,char *argv[]){ //init default pars functionPars2 *pars = new functionPars2(); pars->postFileName = NULL;//"sam_trans_post_all_hap.txt"; // pars->kFileName = ""; //"k_all_hap.txt"; pars->cFileName = "";//"covar.txt"; //"k_all_hap.txt"; pars->xFileName = "x.txt"; //"k_all_hap.txt"; pars->dFileName = NULL; pars->posFileName = NULL; pars->dumpPostFileName = NULL; pars->outFileName = "stat.txt"; pars->design = 2;//chg 6/1 2009 pars->nSim = 10; pars->ccAll = 0; pars->sig = 0.05; pars->infer = 1; //chg 6/1 2009 pars->missingExists =1; pars->every = 1; pars->k2 = 0; //now get commandline args; if (argc<3){ printf("-------------------------------\nHMMtest 0.74 \nMust supply atleast 2 args.\n"); print_functionPars2(pars); delete pars; return 0; } string arg; for (int i=1;ipostFileName=argv[i+1]; else if(arg.compare("-K0")==0){ pars->kFileName=argv[i+1]; pars->isK0 = 1; } else if(arg.compare("-K")==0){ pars->kFileName=argv[i+1]; pars->isK0 = 0; } else if(arg.compare("-c")==0) pars->cFileName=argv[i+1]; else if(arg.compare("-x")==0) pars->xFileName=argv[i+1]; else if(arg.compare("-o")==0) pars->dumpPostFileName=argv[i+1]; else if(arg.compare("-p")==0) pars->posFileName=argv[i+1]; else if(arg.compare("-D")==0) pars->dFileName=argv[i+1]; else if(arg.compare("-f")==0) pars->outFileName=argv[i+1]; else if(arg.compare("-d")==0) pars->design=atoi(argv[i+1]); else if(arg.compare("-Nsim")==0) pars->nSim=atoi(argv[i+1]); else if(arg.compare("-k2")==0) pars->k2=atoi(argv[i+1]); else if(arg.compare("-every")==0)//added 30 march 2009 0.988 pars->every=atoi(argv[i+1]); /*//chg 6/1 2009 else if(arg.compare("-NA")==0){ pars->missingExists=atoi(argv[i+1]); printf("missingExists args is:%d\n",pars->missingExists); } */ else if(arg.compare("-sig")==0) pars->sig=atoi(argv[i+1]); else if(arg.compare("-infer")==0) pars->infer=atoi(argv[i+1]); else if(arg.compare("-ccAll")==0) pars->ccAll=atoi(argv[i+1]); else{ printf("\t\tUnknow argument \t%s\n\t\tWill exit now\n",argv[i]); exit(0); } } print_functionPars2(pars); uber_main(pars); delete pars; return 0; }