Skip to content

Commit 7475937

Browse files
committed
Rename remove_telomeres to erase_flanks
And move unit tests to one place. Fixes #398
1 parent a64a323 commit 7475937

File tree

8 files changed

+125
-116
lines changed

8 files changed

+125
-116
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
- An environment variable `TSDATE_ENABLE_NUMBA_CACHE` can be set to cache JIT
1313
compiled code, speeding up loading time (useful when testing).
1414

15+
- The time taken for running _tsdate_ is now recorded in the provenance data
16+
1517
**Documentation**
1618

1719
- Various fixes in documentation, including documenting returned fits.
@@ -27,6 +29,10 @@
2729
of more than one tree, as tests have found that span-weighting the conditional coalescent
2830
causes substantial bias.
2931

32+
- The `trim_telomeres` parameter in the `tsdate.preprocess_ts()` function has been renamed
33+
to `erase_flanks`, to match `tinfer.preprocess()`. The previous name is kept as a
34+
deprecated alias.
35+
3036
## [0.2.1] - 2024-07-31
3137

3238
**Bugfixes**

docs/usage.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ print(
133133
```
134134
:::{note}
135135
In simulated data you may not have missing data regions, and you may be able to
136-
pass `remove_telomeres=False` to the `preprocess_ts` function.
136+
pass `erase_flanks=False` to the `preprocess_ts` function.
137137
:::
138138

139139
The inference in this case is much more noisy (as illustrated using the original

tests/test_cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ def test_default_values_preprocess(self):
146146
assert args.tree_sequence == self.infile
147147
assert args.output == self.output
148148
assert args.minimum_gap == 1000000
149-
assert args.trim_telomeres
149+
assert args.erase_flanks
150150
assert args.split_disjoint
151151

152152

tests/test_functions.py

Lines changed: 0 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -1691,96 +1691,6 @@ def test_constrain_ages_leastsquare(self):
16911691
assert r2_2 > r2_1
16921692

16931693

1694-
class TestPreprocessTs(unittest.TestCase):
1695-
"""
1696-
Test preprocess_ts works as expected
1697-
"""
1698-
1699-
def verify(self, ts, minimum_gap=None, remove_telomeres=None, **kwargs):
1700-
with self.assertLogs("tsdate.util", level="INFO") as logs:
1701-
if minimum_gap is not None and remove_telomeres is not None:
1702-
ts = tsdate.preprocess_ts(
1703-
ts, minimum_gap=minimum_gap, remove_telomeres=remove_telomeres
1704-
)
1705-
elif minimum_gap is not None and remove_telomeres is None:
1706-
ts = tsdate.preprocess_ts(ts, minimum_gap=minimum_gap)
1707-
elif remove_telomeres is not None and minimum_gap is None:
1708-
ts = tsdate.preprocess_ts(ts, remove_telomeres=remove_telomeres)
1709-
else:
1710-
ts = tsdate.preprocess_ts(ts, **kwargs)
1711-
messages = [record.msg for record in logs.records]
1712-
assert "Beginning preprocessing" in messages
1713-
return ts
1714-
1715-
def test_no_sites(self):
1716-
ts = utility_functions.two_tree_ts()
1717-
with pytest.raises(ValueError):
1718-
tsdate.preprocess_ts(ts)
1719-
1720-
def test_invariant_sites(self):
1721-
# Test that invariant sites are not removed by default
1722-
# (and simularly for unused individuals & populations)
1723-
ts = utility_functions.site_no_mutations()
1724-
assert ts.num_sites != 0
1725-
assert ts.num_individuals != 0
1726-
assert ts.num_populations != 0
1727-
removed = self.verify(ts)
1728-
assert removed.num_sites == ts.num_sites
1729-
assert removed.num_individuals == ts.num_individuals
1730-
assert removed.num_populations == ts.num_populations
1731-
assert tsdate.preprocess_ts(ts, **{"filter_sites": True}).num_sites == 0
1732-
assert (
1733-
tsdate.preprocess_ts(ts, **{"filter_populations": True}).num_populations == 0
1734-
)
1735-
assert (
1736-
tsdate.preprocess_ts(ts, **{"filter_individuals": True}).num_individuals == 0
1737-
)
1738-
1739-
def test_no_intervals(self):
1740-
ts = utility_functions.two_tree_mutation_ts()
1741-
assert ts.tables.edges == self.verify(ts, remove_telomeres=False).tables.edges
1742-
assert ts.tables.edges == self.verify(ts, minimum_gap=0.05).tables.edges
1743-
1744-
def test_passed_intervals(self):
1745-
# Mostly we would not pass in user-defined intervals: this is mainly for testing
1746-
ts = utility_functions.single_tree_ts_n3() # No sites!
1747-
ts = tsdate.preprocess_ts(
1748-
ts, delete_intervals=[(0, 0.1), (0.5, ts.sequence_length)]
1749-
)
1750-
assert ts.num_edges > 1
1751-
assert np.allclose(ts.edges_left, 0.1)
1752-
assert np.allclose(ts.edges_right, 0.5)
1753-
1754-
def test_bad_delete_intervals(self):
1755-
ts = utility_functions.two_tree_mutation_ts()
1756-
with pytest.raises(ValueError, match="specify both"):
1757-
tsdate.preprocess_ts(ts, delete_intervals=[(0, 0.1)], minimum_gap=0.05)
1758-
with pytest.raises(ValueError, match="specify both"):
1759-
tsdate.preprocess_ts(ts, delete_intervals=[(0, 0.1)], remove_telomeres=True)
1760-
1761-
def test_delete_interval(self):
1762-
ts = utility_functions.ts_w_data_desert(40, 60, 100)
1763-
trimmed = self.verify(ts, minimum_gap=20, remove_telomeres=False)
1764-
lefts = trimmed.edges_left
1765-
rights = trimmed.edges_right
1766-
assert not np.any(np.logical_and(lefts > 41, lefts < 59))
1767-
assert not np.any(np.logical_and(rights > 41, rights < 59))
1768-
1769-
def test_remove_telomeres(self):
1770-
ts = utility_functions.ts_w_data_desert(0, 5, 100)
1771-
removed = self.verify(ts, minimum_gap=ts.get_sequence_length())
1772-
lefts = removed.tables.edges.left
1773-
rights = removed.tables.edges.right
1774-
assert not np.any(np.logical_and(lefts > 0, lefts < 4))
1775-
assert not np.any(np.logical_and(rights > 0, rights < 4))
1776-
ts = utility_functions.ts_w_data_desert(95, 100, 100)
1777-
removed = self.verify(ts, minimum_gap=ts.get_sequence_length())
1778-
lefts = removed.tables.edges.left
1779-
rights = removed.tables.edges.right
1780-
assert not np.any(np.logical_and(lefts > 96, lefts < 100))
1781-
assert not np.any(np.logical_and(rights > 96, rights < 100))
1782-
1783-
17841694
class TestNodeTimes:
17851695
"""
17861696
Test node_times works as expected.

tests/test_provenance.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def test_preprocess_defaults_recorded(self):
113113
preprocessed_ts = tsdate.preprocess_ts(ts)
114114
assert preprocessed_ts.num_provenances == num_provenances + 1
115115
rec = json.loads(preprocessed_ts.provenance(-1).record)
116-
assert rec["parameters"]["remove_telomeres"]
116+
assert rec["parameters"]["erase_flanks"]
117117
assert rec["parameters"]["minimum_gap"] == 1000000
118118
assert rec["parameters"]["delete_intervals"] == []
119119

@@ -124,8 +124,8 @@ def test_preprocess_interval_recorded(self):
124124
assert preprocessed_ts.num_provenances == num_provenances + 1
125125
rec = json.loads(preprocessed_ts.provenance(-1).record)
126126
assert rec["parameters"]["minimum_gap"] == 20
127-
assert rec["parameters"]["remove_telomeres"] is not None
128-
assert not rec["parameters"]["remove_telomeres"]
127+
assert rec["parameters"]["erase_flanks"] is not None
128+
assert not rec["parameters"]["erase_flanks"]
129129
deleted_intervals = rec["parameters"]["delete_intervals"]
130130
assert len(deleted_intervals) == 1
131131
assert deleted_intervals[0][0] < deleted_intervals[0][1]

tests/test_util.py

Lines changed: 93 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import pytest
3232
import tsinfer
3333
import tskit
34+
import utility_functions
3435

3536
import tsdate
3637

@@ -203,6 +204,85 @@ def test_inferred(self):
203204

204205

205206
class TestPreprocessTs:
207+
def verify(self, ts, caplog, minimum_gap=None, erase_flanks=None, **kwargs):
208+
with caplog.at_level(logging.INFO):
209+
if minimum_gap is not None and erase_flanks is not None:
210+
ts = tsdate.preprocess_ts(
211+
ts, minimum_gap=minimum_gap, erase_flanks=erase_flanks
212+
)
213+
elif minimum_gap is not None and erase_flanks is None:
214+
ts = tsdate.preprocess_ts(ts, minimum_gap=minimum_gap)
215+
elif erase_flanks is not None and minimum_gap is None:
216+
ts = tsdate.preprocess_ts(ts, erase_flanks=erase_flanks)
217+
else:
218+
ts = tsdate.preprocess_ts(ts, **kwargs)
219+
220+
assert "Beginning preprocessing" in caplog.text
221+
return ts
222+
223+
def test_invariant_sites(self, caplog):
224+
# Test that invariant sites are not removed by default
225+
# (and simularly for unused individuals & populations)
226+
ts = utility_functions.site_no_mutations()
227+
assert ts.num_sites != 0
228+
assert ts.num_individuals != 0
229+
assert ts.num_populations != 0
230+
removed = self.verify(ts, caplog)
231+
assert removed.num_sites == ts.num_sites
232+
assert removed.num_individuals == ts.num_individuals
233+
assert removed.num_populations == ts.num_populations
234+
assert tsdate.preprocess_ts(ts, **{"filter_sites": True}).num_sites == 0
235+
assert (
236+
tsdate.preprocess_ts(ts, **{"filter_populations": True}).num_populations == 0
237+
)
238+
assert (
239+
tsdate.preprocess_ts(ts, **{"filter_individuals": True}).num_individuals == 0
240+
)
241+
242+
def test_no_intervals(self, caplog):
243+
ts = utility_functions.two_tree_mutation_ts()
244+
assert ts.tables.edges == self.verify(ts, caplog, erase_flanks=False).tables.edges
245+
assert ts.tables.edges == self.verify(ts, caplog, minimum_gap=0.05).tables.edges
246+
247+
def test_passed_intervals(self):
248+
# Mostly we would not pass in user-defined intervals: this is mainly for testing
249+
ts = utility_functions.single_tree_ts_n3() # No sites!
250+
ts = tsdate.preprocess_ts(
251+
ts, delete_intervals=[(0, 0.1), (0.5, ts.sequence_length)]
252+
)
253+
assert ts.num_edges > 1
254+
assert np.allclose(ts.edges_left, 0.1)
255+
assert np.allclose(ts.edges_right, 0.5)
256+
257+
def test_bad_delete_intervals(self):
258+
ts = utility_functions.two_tree_mutation_ts()
259+
with pytest.raises(ValueError, match="specify both"):
260+
tsdate.preprocess_ts(ts, delete_intervals=[(0, 0.1)], minimum_gap=0.05)
261+
with pytest.raises(ValueError, match="specify both"):
262+
tsdate.preprocess_ts(ts, delete_intervals=[(0, 0.1)], erase_flanks=True)
263+
264+
def test_delete_interval(self, caplog):
265+
ts = utility_functions.ts_w_data_desert(40, 60, 100)
266+
trimmed = self.verify(ts, caplog, minimum_gap=20, erase_flanks=False)
267+
lefts = trimmed.edges_left
268+
rights = trimmed.edges_right
269+
assert not np.any(np.logical_and(lefts > 41, lefts < 59))
270+
assert not np.any(np.logical_and(rights > 41, rights < 59))
271+
272+
def test_erase_flanks(self, caplog):
273+
ts = utility_functions.ts_w_data_desert(0, 5, 100)
274+
removed = self.verify(ts, caplog, minimum_gap=ts.get_sequence_length())
275+
lefts = removed.tables.edges.left
276+
rights = removed.tables.edges.right
277+
assert not np.any(np.logical_and(lefts > 0, lefts < 4))
278+
assert not np.any(np.logical_and(rights > 0, rights < 4))
279+
ts = utility_functions.ts_w_data_desert(95, 100, 100)
280+
removed = self.verify(ts, caplog, minimum_gap=ts.get_sequence_length())
281+
lefts = removed.tables.edges.left
282+
rights = removed.tables.edges.right
283+
assert not np.any(np.logical_and(lefts > 96, lefts < 100))
284+
assert not np.any(np.logical_and(rights > 96, rights < 100))
285+
206286
def test_no_sites(self):
207287
ts = tskit.Tree.generate_comb(3).tree_sequence
208288
with pytest.raises(ValueError, match="no sites"):
@@ -273,19 +353,25 @@ def test_record_provenance(self):
273353
ts = tsdate.preprocess_ts(ts, record_provenance=False)
274354
assert ts.num_provenances == num_provenances + 1
275355

276-
def test_trim_flanks(self):
356+
def test_no_erase_flanks(self):
277357
tables = tskit.Tree.generate_comb(3, span=100).tree_sequence.dump_tables()
278358
tables.sites.add_row(10, "A")
279359
tables.sites.add_row(90, "A")
280360
ts = tables.tree_sequence()
281361
assert ts.sequence_length == 100
282362
assert ts.num_trees == 1
283-
ts = tsdate.preprocess_ts(ts)
284-
assert ts.num_trees == 3
285-
assert ts.first().num_edges == 0
286-
assert ts.first().interval.right == 10 - 1
287-
assert ts.last().num_edges == 0
288-
assert ts.last().interval.left == 90 + 1
363+
for param_name in ("erase_flanks", "remove_telomeres"):
364+
params = {param_name: False}
365+
new_ts = tsdate.preprocess_ts(ts, **params)
366+
assert new_ts.num_trees == 1
367+
assert new_ts.sequence_length == 100
368+
369+
@pytest.mark.parametrize("bool1", [True, False])
370+
@pytest.mark.parametrize("bool2", [True, False])
371+
def test_erase_flanks_telomeres_combo(self, bool1, bool2):
372+
ts = tskit.Tree.generate_comb(3, span=100).tree_sequence
373+
with pytest.raises(ValueError, match="specify both"):
374+
tsdate.preprocess_ts(ts, erase_flanks=bool1, remove_telomeres=bool2)
289375

290376
def test_sim_example(self):
291377
# Test a larger example
@@ -310,8 +396,6 @@ def test_sim_example(self):
310396
# Next assumes no breakpoints before first site or after last
311397
assert ts.num_trees == num_trees + first_empty + last_empty
312398

313-
# TODO - test minimum_gap param
314-
315399

316400
class TestUnaryNodeCheck:
317401
def test_inferred(self):

tsdate/cli.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,8 @@ def tsdate_cli_parser():
225225
default=1000000,
226226
)
227227
parser.add_argument(
228-
"--trim-telomeres",
228+
"--erase-flanks",
229+
"--trim_telomeres",
229230
type=bool,
230231
help=(
231232
"Should all material before the first site and after the "
@@ -313,7 +314,7 @@ def run_preprocess(args):
313314
except tskit.FileFormatError as ffe:
314315
error_exit(f"FileFormatError loading '{args.tree_sequence}: {ffe}")
315316
snipped_ts = tsdate.preprocess_ts(
316-
ts, minimum_gap=args.minimum_gap, remove_telomeres=args.trim_telomeres
317+
ts, minimum_gap=args.minimum_gap, erase_flanks=args.erase_flanks
317318
)
318319
snipped_ts.dump(args.output)
319320

0 commit comments

Comments
 (0)