Skip to content

Commit cdf4b45

Browse files
committed
Add comments
1 parent a59841d commit cdf4b45

File tree

1 file changed

+22
-4
lines changed

1 file changed

+22
-4
lines changed

python/tests/test_beagle.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ def compute_state_probability_matrix(fm, bm, ref_h, query_h, rho, mu):
292292
return (sm, fwd_hap_probs, bwd_hap_probs)
293293

294294

295-
def compute_interpolated_haplotype_matrix(
295+
def interpolate_haplotype_probability_matrix(
296296
fwd_hap_probs, bwd_hap_probs, genotyped_pos, imputed_pos
297297
):
298298
"""
@@ -365,47 +365,65 @@ def run_beagle(ref_h, query_h, pos):
365365
"""
366366
Run the BEAGLE 4.1 imputation algorithm.
367367
368+
`ref_h` and `query_h` span all genotyped and imputed markers.
369+
368370
:param numpy.ndarray ref_h: Reference haplotypes.
369371
:param numpy.ndarray query_h: One query haplotype.
370-
:param numpy.ndarray pos: Site positions.
372+
:param numpy.ndarray pos: Site positions of all the markers.
371373
:return: MAP alleles at imputed markers in the query haplotype.
372374
:rtype: numpy.ndarray
373375
"""
374376
assert ref_h.shape[0] == len(pos)
375377
assert query_h.shape[0] == len(pos)
378+
# Index of genotyped markers in the query haplotype
376379
genotyped_pos_idx = np.where(query_h != -1)[0]
380+
# Index of imputed markers in the query haplotype
377381
imputed_pos_idx = np.where(query_h == -1)[0]
378382
assert len(genotyped_pos_idx) > 0
379383
assert len(imputed_pos_idx) > 0
384+
# Site positions of genotyped markers
380385
genotyped_pos = pos[genotyped_pos_idx]
386+
# Site positions of imputed markers
381387
imputed_pos = pos[imputed_pos_idx]
382388
m = len(genotyped_pos)
383389
x = len(imputed_pos)
384390
assert m + x == len(pos)
385391
h = ref_h.shape[1]
392+
# Subset the reference haplotypes to genotyped markers
386393
ref_h_genotyped = ref_h[genotyped_pos_idx, :]
387394
assert ref_h_genotyped.shape == (m, h)
395+
# Subset the query haplotype to genotyped markers
388396
query_h_genotyped = query_h[genotyped_pos_idx]
389397
assert len(query_h_genotyped) == m
398+
# Set mismatch probabilities at genotyped markers
390399
mu = get_mismatch_prob(genotyped_pos)
391400
assert len(mu) == m
401+
# Set switch probabilities at genotyped markers
392402
rho = get_switch_prob(genotyped_pos, h, ne=10) # Small ref. panel
393403
assert len(rho) == m
404+
# Compute forward probability matrix over genotyped markers
394405
fm = compute_forward_probability_matrix(ref_h_genotyped, query_h_genotyped, rho, mu)
395406
assert fm.shape == (m, h)
407+
# Compute backward probability matrix over genotyped markers
396408
bm = compute_backward_probability_matrix(
397409
ref_h_genotyped, query_h_genotyped, rho, mu
398410
)
399411
assert bm.shape == (m, h)
400-
_, fwd_hap_probs, bwd_hap_probs = compute_state_probability_matrix(
412+
# Compute HMM state probability matrix over genotyped markers
413+
# and forward and backward haplotype probability matrices
414+
sm, fwd_hap_probs, bwd_hap_probs = compute_state_probability_matrix(
401415
fm, bm, ref_h_genotyped, query_h_genotyped, rho, mu
402416
)
417+
assert sm.shape == (m, h) # sm not used further
403418
assert fwd_hap_probs.shape == (m, 2)
404419
assert bwd_hap_probs.shape == (m, 2)
405-
i_hap_probs = compute_interpolated_haplotype_matrix(
420+
# Interpolate haplotype probabilities
421+
# from genotype markers to imputed markers
422+
i_hap_probs = interpolate_haplotype_probability_matrix(
406423
fwd_hap_probs, bwd_hap_probs, genotyped_pos, imputed_pos
407424
)
408425
assert i_hap_probs.shape == (x, 2)
426+
# Get MAP alleles at imputed markers
409427
imputed_alleles = get_map_alleles(i_hap_probs)
410428
assert len(imputed_alleles) == x
411429
return imputed_alleles

0 commit comments

Comments
 (0)