/* Nucleotide Sequence Generator - seq-gen, version 1.2.5 */ /* (c) Copyright 1996-2001, Andrew Rambaut & Nick Grassly */ /* Department of Zoology, University of Oxford */ #include #include #include #include #include "global.h" #include "tree.h" #include "evolve.h" #include "models.h" #include "gamma.h" #include "random.h" #include "covarion.h" int numTaxa, numBases, maxPartitions, numPartitions, fileFormat; double gammaShape, proportionInvariable; int numCats, rateHetero, invariableSites; double catRate[MAX_RATE_CATS]; double freqRate[MAX_RATE_CATS]; char *nucleotides="ACGT"; static double matrix[MAX_RATE_CATS][16]; static double vector[4]; static double *gammaRates; static short *categories; static short *invariable; static double *siteRates; /* prototypes */ char SetBase(double *P); short IsInvariable(); void SetSequence(char *seq, char *source, int inFromSite, int inNumSites); void RandomSequence(char *seq, int inNumSites); void MutateSequence(char *seq, int inFromSite, int inNumSites, double len); void MutateSequenceCov(char *seq, int inFromSite, int inNumSites, double *length, double scale); void SetSiteRates(int inFromSite, int inNumSites, double inScale); void EvolveNode(TNode *anc, TNode *des, int inFromSite, int inNumSites, double scale); void WritePhylipFormat(FILE *fv, TTree **treeSet, int *partitionLengths); void WriteNexusFormat(FILE *fv, int treeNo, int datasetNo, TTree **treeSet, int *partitionLengths); void WriteAncestralSequencesNode(FILE *fv, TTree *tree, int *nodeNo, TNode *des); /* functions */ void CreateRates() { if (rateHetero==GammaRates) gammaRates=CAllocMem(numBases*sizeof(double), "gammaRates", "CreateSequences", 0); else if (rateHetero==DiscreteGammaRates) categories=CAllocMem(numBases*sizeof(short), "categories", "CreateSequences", 0); if (invariableSites) invariable=CAllocMem(numBases*sizeof(short), "invariable", "CreateSequences", 0); siteRates=CAllocMem(numBases*sizeof(double), "siteRates", "CreateSequences", 0); } void CreateSequences(TTree *tree, int inNumSites) { TNode *P; P=tree->nodeList; while (P!=NULL) { if (P->sequence != NULL) free(P->sequence); P->sequence=CAllocMem(inNumSites+1, "sequences", "CreateSequences", 0); P=P->next; } } void SetCategories() { int i; double sumRates; if (rateHetero==CodonRates) { sumRates=catRate[0]+catRate[1]+catRate[2]; if (sumRates!=3.0) { catRate[0]*=3.0/sumRates; catRate[1]*=3.0/sumRates; catRate[2]*=3.0/sumRates; } } else if (rateHetero==GammaRates) { for (i=0; i(*P) && j<'\3'; j++) P++; return j; } short IsInvariable() { double r; r=rndu(); if (r < proportionInvariable) return 1; else return 0; } void RandomSequence(char *seq, int inNumSites) { int i; char *P; P=seq; for (i=0; isequence, anc->sequence, inNumSites); if (!covarion) { len=des->length0*scale; MutateSequence(des->sequence, inFromSite, inNumSites, len); } else { MutateSequenceCov(des->sequence, inFromSite, inNumSites, des->effectiveLength0, scale); } if (des->tipNo==-1) { EvolveNode(des, des->branch1, inFromSite, inNumSites, scale); EvolveNode(des, des->branch2, inFromSite, inNumSites, scale); } } void EvolveSequences(TTree *tree, int inFromSite, int inNumSites, double scale, char *ancestor) { if (ancestor==NULL) RandomSequence(tree->root->sequence, inNumSites); else SetSequence(tree->root->sequence, ancestor, inFromSite, inNumSites); /* Rescale the branch lengths to accommodate the invariable sites */ if (invariableSites) scale /= (1.0 - proportionInvariable); SetSiteRates(inFromSite, inNumSites, scale); EvolveNode(tree->root, tree->root->branch1, inFromSite, inNumSites, scale); EvolveNode(tree->root, tree->root->branch2, inFromSite, inNumSites, scale); if (!tree->rooted) EvolveNode(tree->root, tree->root->branch0, inFromSite, inNumSites, scale); } void WriteSequences(FILE *fv, int treeNo, int datasetNo, TTree **treeSet, int *partitionLengths) { switch (fileFormat) { case PHYLIPFormat: case RelaxedFormat: WritePhylipFormat(fv, treeSet, partitionLengths); break; case NEXUSFormat: WriteNexusFormat(fv, treeNo, datasetNo, treeSet, partitionLengths); break; } } void WritePhylipFormat(FILE *fv, TTree **treeSet, int *partitionLengths) { int i, j, k; char *P; fprintf(fv, " %d %d\n", numTaxa, numBases); for (i=0; inames[i]); else { j=0; P=treeSet[0]->names[i]; while (j<10 && *P) { fputc(*P, fv); j++; P++; } while (j<10) { fputc(' ', fv); j++; } } for (k = 0; k < numPartitions; k++) { P=treeSet[k]->tips[i]->sequence; for (j=0; j 0 && datasetNo > 0) fprintf(fv, "Begin DATA;\t[Tree %d, Dataset %d]\n", treeNo, datasetNo); else if (treeNo > 0) fprintf(fv, "Begin DATA;\t[Tree %d]\n", treeNo); else if (datasetNo > 0) fprintf(fv, "Begin DATA;\t[Dataset %d]\n", datasetNo); else fprintf(fv, "Begin DATA;\n"); fprintf(fv, "\tDimensions NTAX=%d NCHAR=%d;\n", numTaxa, numBases); fprintf(fv, "\tFormat MISSING=? GAP=- DATATYPE=DNA;\n"); fprintf(fv, "\tMatrix\n"); maxLen = strlen(treeSet[0]->names[0]); for (i=1; inames[i]); if (len > maxLen) maxLen = len; } for (i=0; inames[i]); len = maxLen - strlen(treeSet[0]->names[i]); for (j = 0; j < len; j++) fputc(' ', fv); for (k = 0; k < numPartitions; k++) { P=treeSet[k]->tips[i]->sequence; for (j=0; jrooted) n = (2 * numTaxa) - 3; else n = (2 * numTaxa) - 2; fprintf(fv, " %d %d\n", n, numBases); n = numTaxa + 1; fprintf(fv, "%d\t", n); P=tree->root->sequence; for (j=0; jrooted) WriteAncestralSequencesNode(fv, tree, &n, tree->root->branch0); WriteAncestralSequencesNode(fv, tree, &n, tree->root->branch1); WriteAncestralSequencesNode(fv, tree, &n, tree->root->branch2); } void WriteAncestralSequencesNode(FILE *fv, TTree *tree, int *nodeNo, TNode *des) { int j; char *P = des->sequence; if (des->tipNo==-1) { (*nodeNo)++; fprintf(fv, "%d\t", *nodeNo); for (j=0; jbranch1); WriteAncestralSequencesNode(fv, tree, nodeNo, des->branch2); } else { fprintf(fv, "%s\t", tree->names[des->tipNo]); for (j=0; j