#!usr/bin/perl -w # # code that reads a phylip tree file (output from Paup: no spaces, ending ";") # and resolve it by adding branches with lengths 0. if ($#ARGV >= 0) { $treeFile = pop @ARGV; } else {die "No tree file\n";} open FH1, "<$treeFile"; @tree= ; close FH1; $tree = join('',@tree); chomp $tree; chop $tree; #delete the last character: ';' #print "$tree \n\n"; @st = split /,/, $tree; foreach $st (@st){ $position=""; $k0=index($st,")"); while($k0>-1){ $position .= " $k0"; $k0=index($st,")",$k0+1); } @pos=split(" ",$position); $num=$#pos+1; if ($num){ push @number, (-$num); } else{ $num=0; $k0=index($st,"("); while($k0>-1){ $num++; $k0=index($st,"(",$k0+1); } push @number, $num; } } # number counts the parentheses: +1 for ( and -1 for ). # positions are the positions of the )'s. $size = $#st +1; #for ($i=0; $i<$size; $i++){print "$number[$i]\t$st[$i]\n";} #$maxsteps=1; #$step=0; # this is to check if, for each taxa (or line), the node corresponding # to the last "(" has only 2 nodes within. for ($i=0; $i<$size; $i++){ if ($number[$i]>0){ # there is at least 1 node starting here. # we need to check that all the nodes starting here are resolved. for ($nodeindex=$number[$i]-1; $nodeindex>=0; $nodeindex--){ # get position $j where the current node closes. $n=$nodeindex; $j=$i; while ($n>= 0){ $j++; if ($j>=$size){ die "pbm on line 65\n";} $n += $number[$j]; } #now counts the number of nodes within. $k0=$i; if ($nodeindex >0){ $n2=$nodeindex; while ($n2>0){ $k0++; $n2 +=$number[$k0];} } $k=$k0; $n1 =1; #print "i is $i, j is $j\n\tk=k0 is $k0, n1 is $n1\n"; while ($k<$j){ $n1++;$k++; if ($number[$k]>0){ $n2=$number[$k]; while ($n2>0){ $k++; $n2 +=$number[$k];} } #print "\tk is $k, n1 is $n1\n"; } #print "\tn1 is $n1\n"; if ($n1>2){ #print "resolving polytomy at node positions $i, index $nodeindex, closing at $j\n"; $k=$k0+1; substr($st[$k],0,0)="("; # ok, a ( is added. $number[$k]++; $n -= $number[$j]; $index=0; for ($k=0; $k<=$n; $k++){ $index=index($st[$j],")",$index+1); } substr($st[$j],$index,0)="):0.0"; $number[$j]--; #for ($aa=0; $aa<$size; $aa++){print "$number[$aa]\t$st[$aa]\n";} #$step++; if ($step>$maxsteps){ die "maximum steps\n";} } } }} #for ($i=0; $i<$size; $i++){print "$number[$i]\t$st[$i]\n";} for ($i=0; $i<($size-1); $i++){ print STDOUT "$st[$i],"; } print STDOUT "$st[$size-1];\n";