Skip to content

Commit 0a41e24

Browse files
committed
improved output indformation and fixed bug
Added "?" to the output file to indicate when we were unable to test a clan in a tree. Up until now, this would have been a "1" as it did not violate the CLan, however this change may be more useful to users as (for instnace) it will allow to identify where trees were not tested by any Clan. Also fixed a bug, where for a very small clade a Clan may be passed when in fact it violates it.
1 parent 199b5c7 commit 0a41e24

File tree

4 files changed

+123
-104
lines changed

4 files changed

+123
-104
lines changed

.DS_Store

6 KB
Binary file not shown.

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,5 @@ testtrees_for_clade_check.ph
2222
testtrees_for_clade_check.ph.scores.txt
2323

2424
clan_check
25+
treelist_to_keep.txt
26+
paup4a151_osx

clan_check.c

Lines changed: 119 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -236,15 +236,15 @@ int main (int argc, char *argv[])
236236
}
237237

238238
}
239-
/* printf("number of taxa in tree are %d\n", numtaxa); */
239+
/*printf("number of taxa in tree are %d\n", numtaxa); */
240240
/* for(i=0; i<numtaxa; i++) printf("taxa %d = >%s<\n", i, taxa[i]);*/
241241

242242
/* go through all the clades from the clades file */
243243
string[0]='\0';
244244
fprintf(outfile, "\t%d", numtaxa);
245245
for(n=0; n<numclades; n++)
246246
{
247-
/*printf("n=%d\n", n);*/
247+
/*printf("Checking Clade %d\n", n); */
248248
/* identify the taxa from this clade that are actually in the tree */
249249
strcpy(string, clades[n]);
250250
numintree=0;
@@ -266,114 +266,129 @@ int main (int argc, char *argv[])
266266
token = strtok(NULL, " ");
267267
} /* clanlist now contains a list of all the taxa that are in both the clade and the tree */
268268
foundclade=TRUE;
269-
if(numintree > 1 && (numtaxa - numintree) > 1) /* there is no point in looking for clades of taxa if only onw (or none) of them are in the tree */
269+
if(numintree > 1) /* there is no point in looking for clades of taxa if only onw (or none) of them are in the tree */
270270
{
271-
foundclade=FALSE;
272-
/* Extract all the clans from the tree */
273-
i=0;
274-
while(tree[i] != ';' && !foundclade)
275-
{
276-
if( tree[i] == '(' && i !=0)
271+
if((numtaxa - numintree) > 1) /* If the tree is ONLY made up of taxa from the clan, then by default the clan is there and we don;t need tot test any further */
272+
{
273+
foundclade=FALSE;
274+
/* Extract all the clans from the tree */
275+
i=0;
276+
/*printf("tree = %s\n", tree); */
277+
while(tree[i] != ';' && !foundclade)
277278
{
278-
/* identify all the taxa IDs in this split */
279-
g=i; j=0;
280-
constraint[j] = '('; j++; constraint[j] = ' '; j++;
281-
l=1; g++; numinconstraint=0;
282-
while((l != 0 || tree[g-1] != ')') && tree[g] != ';' && !foundclade) /* CHANGED RECENTLY FROM while(l != 0 && tree[k-1] != ')' && tree[k] != ';' ) */
283-
{
284-
switch(tree[g])
285-
{
286-
case '(':
287-
l++;
288-
g++;
289-
break;
290-
case ')':
291-
l--;
292-
g++;
293-
break;
294-
case ':':
295-
while(tree[g] != '(' && tree[g] != ')' && tree[g] != ',' && tree[g] != ';' ) g++;
296-
break;
297-
case ',':
298-
case ' ':
299-
g++;
300-
break;
301-
default:
302-
while(tree[g] != ',' && tree[g] != '(' && tree[g] != ')' && tree[g] != ':' )
303-
{
304-
constraint[j] = tree[g];
305-
g++; j++;
306-
}
307-
numinconstraint++;
308-
constraint[j] = ' '; j++;
309-
constraint[j] = ','; j++;
310-
constraint[j] = ' '; j++;
311-
break;
312-
}
313-
}
314-
constraint[j-1] = ' '; /* overwrites the extra comma at the end */
315-
316-
constraint[j] = ')'; j++;
317-
constraint[j] = '\0';
318-
/* printf("%s\n", constraint);*/
319-
320-
/* compare this constraint to the currently defined clade from the file [ in the array cladelist ] */
321-
foundclade=FALSE;
322-
g=0;
323-
/* if the number of taxa in or outside this constraint is less or more then the number of taxa in the clade list, then there is no point checking */
324-
if(numinconstraint == numintree)
325-
{
326-
/*printf("checking within\n"); */
327-
/* check to see if the taxa are the same inside the constraint as in the clade */
328-
foundtaxa=TRUE;
329-
330-
for(l=0; l<numintree; l++) /*for all taxa in the taxalist */
331-
{
332-
string[0]=' ';
333-
string[1]='\0';
334-
strcat(string, clanlist[l]);
335-
strcat(string, " ");
336-
/* printf("looking for \"%s\" in %s\n", string, constraint);*/
337-
338-
if(strstr(constraint,string) == '\0')
339-
{
340-
/*printf("didn't find it\n");*/
341-
foundtaxa=FALSE;
342-
}
343-
else
344-
{
345-
/*printf("found it\n"); */
346-
g++; /* this will equal the number of taxa from the clades that are in the constaint */
347-
}
348-
}
349-
if(foundtaxa == TRUE) foundclade = TRUE;
350-
}
351-
if(!foundclade)
352-
{
353-
/* printf("num in constraint = %d\tnum taxa = %d\tnumintree = %d\tg = %d\n", numinconstraint, numtaxa, numintree, g ); */
354-
}
355-
if((numtaxa-numinconstraint) == numintree && !foundclade && g == 0)
356-
{
357-
/* Check to see if the taxa outside the constraint are the same as in the clade */
358-
/*printf("checking outside\n");*/
359-
foundclade = TRUE; /* this is because if k=0, then none of the clade taxa were in the constaint, and if the number of remaining taxa in the tree == number of taxa in clade, then they must be all the taxa we are looking for */
360-
}
361-
279+
if( tree[i] == '(' && i !=0)
280+
{
281+
/* identify all the taxa IDs in this split */
282+
g=i; j=0;
283+
constraint[j] = '('; j++; constraint[j] = ' '; j++;
284+
l=1; g++; numinconstraint=0;
285+
while((l != 0 || tree[g-1] != ')') && tree[g] != ';' && !foundclade) /* CHANGED RECENTLY FROM while(l != 0 && tree[k-1] != ')' && tree[k] != ';' ) */
286+
{
287+
switch(tree[g])
288+
{
289+
case '(':
290+
l++;
291+
g++;
292+
break;
293+
case ')':
294+
l--;
295+
g++;
296+
break;
297+
case ':':
298+
while(tree[g] != '(' && tree[g] != ')' && tree[g] != ',' && tree[g] != ';' ) g++;
299+
break;
300+
case ',':
301+
case ' ':
302+
g++;
303+
break;
304+
default:
305+
while(tree[g] != ',' && tree[g] != '(' && tree[g] != ')' && tree[g] != ':' )
306+
{
307+
constraint[j] = tree[g];
308+
g++; j++;
309+
}
310+
numinconstraint++;
311+
constraint[j] = ' '; j++;
312+
constraint[j] = ','; j++;
313+
constraint[j] = ' '; j++;
314+
break;
315+
}
316+
}
317+
constraint[j-1] = ' '; /* overwrites the extra comma at the end */
362318

319+
constraint[j] = ')'; j++;
320+
constraint[j] = '\0';
321+
/*printf("constraint = %s\n", constraint);*/
322+
323+
/* compare this constraint to the currently defined clade from the file [ in the array cladelist ] */
324+
foundclade=FALSE;
325+
g=0;
326+
/* if the number of taxa in or outside this constraint is less or more then the number of taxa in the clade list, then there is no point checking */
327+
if(numinconstraint <= numintree)
328+
{
329+
/*printf("checking within\n"); */
330+
/* check to see if the taxa are the same inside the constraint as in the clade */
331+
foundtaxa=TRUE;
332+
333+
for(l=0; l<numintree; l++) /*for all taxa in the taxalist */
334+
{
335+
string[0]=' ';
336+
string[1]='\0';
337+
strcat(string, clanlist[l]);
338+
strcat(string, " ");
339+
/* printf("looking for \"%s\" in %s\n", string, constraint);*/
340+
341+
if(strstr(constraint,string) == '\0')
342+
{
343+
/*printf("didn't find it\n");*/
344+
foundtaxa=FALSE;
345+
}
346+
else
347+
{
348+
/*printf("found it\n"); */
349+
g++; /* this will equal the number of taxa from the clades that are in the constaint */
350+
}
351+
}
352+
if(foundtaxa == TRUE) foundclade = TRUE;
353+
}
354+
if(!foundclade)
355+
{
356+
/* printf("num in constraint = %d\tnum taxa = %d\tnumintree = %d\tg = %d\n", numinconstraint, numtaxa, numintree, g ); */
357+
}
358+
if((numtaxa-numinconstraint) == numintree && !foundclade && g == 0)
359+
{
360+
/* Check to see if the taxa outside the constraint are the same as in the clade */
361+
/* printf("checking outside\n"); */
362+
foundclade = TRUE; /* this is because if k=0, then none of the clade taxa were in the constaint, and if the number of remaining taxa in the tree == number of taxa in clade, then they must be all the taxa we are looking for */
363+
}
364+
365+
366+
}
367+
i++;
363368
}
364-
i++;
369+
370+
if(foundclade)
371+
{
372+
fprintf(outfile, "\t1");
373+
/*printf("found %s\n", clades[n]);*/
374+
}
375+
else
376+
{
377+
fprintf(outfile, "\t0");
378+
/*printf("did not find %s\n", clades[n]);*/
379+
}
380+
}
381+
else
382+
{
383+
fprintf(outfile, "\t1");
384+
/* The tree is entirely made up of taxa from the lcan being tested, so be default it passes. */
365385
}
366-
}
367-
if(foundclade)
368-
{
369-
fprintf(outfile, "\t1");
370-
/*printf("found %s\n", clades[n]);*/
371-
}
372-
else
373-
{
374-
fprintf(outfile, "\t0");
375-
/*printf("did not find %s\n", clades[n]);*/
376386
}
387+
else /* we did not find enough taxa in to test this Clan */
388+
{
389+
fprintf(outfile, "\t?");
390+
391+
}
377392
g=0;
378393

379394
}

trees.ph

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
(((a,(b,(c,d))),f),e);
22
(((a,(b,(e,d))),c),g);
3+
((a,(b,(e,d))),c);
4+

0 commit comments

Comments
 (0)