@@ -292,7 +292,7 @@ def compute_state_probability_matrix(fm, bm, ref_h, query_h, rho, mu):
292
292
return (sm , fwd_hap_probs , bwd_hap_probs )
293
293
294
294
295
- def compute_interpolated_haplotype_matrix (
295
+ def interpolate_haplotype_probability_matrix (
296
296
fwd_hap_probs , bwd_hap_probs , genotyped_pos , imputed_pos
297
297
):
298
298
"""
@@ -365,47 +365,65 @@ def run_beagle(ref_h, query_h, pos):
365
365
"""
366
366
Run the BEAGLE 4.1 imputation algorithm.
367
367
368
+ `ref_h` and `query_h` span all genotyped and imputed markers.
369
+
368
370
:param numpy.ndarray ref_h: Reference haplotypes.
369
371
:param numpy.ndarray query_h: One query haplotype.
370
- :param numpy.ndarray pos: Site positions.
372
+ :param numpy.ndarray pos: Site positions of all the markers .
371
373
:return: MAP alleles at imputed markers in the query haplotype.
372
374
:rtype: numpy.ndarray
373
375
"""
374
376
assert ref_h .shape [0 ] == len (pos )
375
377
assert query_h .shape [0 ] == len (pos )
378
+ # Index of genotyped markers in the query haplotype
376
379
genotyped_pos_idx = np .where (query_h != - 1 )[0 ]
380
+ # Index of imputed markers in the query haplotype
377
381
imputed_pos_idx = np .where (query_h == - 1 )[0 ]
378
382
assert len (genotyped_pos_idx ) > 0
379
383
assert len (imputed_pos_idx ) > 0
384
+ # Site positions of genotyped markers
380
385
genotyped_pos = pos [genotyped_pos_idx ]
386
+ # Site positions of imputed markers
381
387
imputed_pos = pos [imputed_pos_idx ]
382
388
m = len (genotyped_pos )
383
389
x = len (imputed_pos )
384
390
assert m + x == len (pos )
385
391
h = ref_h .shape [1 ]
392
+ # Subset the reference haplotypes to genotyped markers
386
393
ref_h_genotyped = ref_h [genotyped_pos_idx , :]
387
394
assert ref_h_genotyped .shape == (m , h )
395
+ # Subset the query haplotype to genotyped markers
388
396
query_h_genotyped = query_h [genotyped_pos_idx ]
389
397
assert len (query_h_genotyped ) == m
398
+ # Set mismatch probabilities at genotyped markers
390
399
mu = get_mismatch_prob (genotyped_pos )
391
400
assert len (mu ) == m
401
+ # Set switch probabilities at genotyped markers
392
402
rho = get_switch_prob (genotyped_pos , h , ne = 10 ) # Small ref. panel
393
403
assert len (rho ) == m
404
+ # Compute forward probability matrix over genotyped markers
394
405
fm = compute_forward_probability_matrix (ref_h_genotyped , query_h_genotyped , rho , mu )
395
406
assert fm .shape == (m , h )
407
+ # Compute backward probability matrix over genotyped markers
396
408
bm = compute_backward_probability_matrix (
397
409
ref_h_genotyped , query_h_genotyped , rho , mu
398
410
)
399
411
assert bm .shape == (m , h )
400
- _ , fwd_hap_probs , bwd_hap_probs = compute_state_probability_matrix (
412
+ # Compute HMM state probability matrix over genotyped markers
413
+ # and forward and backward haplotype probability matrices
414
+ sm , fwd_hap_probs , bwd_hap_probs = compute_state_probability_matrix (
401
415
fm , bm , ref_h_genotyped , query_h_genotyped , rho , mu
402
416
)
417
+ assert sm .shape == (m , h ) # sm not used further
403
418
assert fwd_hap_probs .shape == (m , 2 )
404
419
assert bwd_hap_probs .shape == (m , 2 )
405
- i_hap_probs = compute_interpolated_haplotype_matrix (
420
+ # Interpolate haplotype probabilities
421
+ # from genotype markers to imputed markers
422
+ i_hap_probs = interpolate_haplotype_probability_matrix (
406
423
fwd_hap_probs , bwd_hap_probs , genotyped_pos , imputed_pos
407
424
)
408
425
assert i_hap_probs .shape == (x , 2 )
426
+ # Get MAP alleles at imputed markers
409
427
imputed_alleles = get_map_alleles (i_hap_probs )
410
428
assert len (imputed_alleles ) == x
411
429
return imputed_alleles
0 commit comments