@@ -58,7 +58,9 @@ def get_signal_model(settings: dict, df: DataFrame) -> MRBeRT:
58
58
59
59
for arg in ["knot_bounds" , "min_dist" ]:
60
60
if not np .isscalar (settings ["knots_samples" ][arg ]):
61
- settings ["knots_samples" ][arg ] = np .asarray (settings ["knots_samples" ][arg ])
61
+ settings ["knots_samples" ][arg ] = np .asarray (
62
+ settings ["knots_samples" ][arg ]
63
+ )
62
64
settings ["knots_samples" ] = {
63
65
** dict (
64
66
num_knots = len (settings ["cov_model" ]["spline_knots" ]) - 2 ,
@@ -138,7 +140,9 @@ def convert_bc_to_em(df: DataFrame, signal_model: MRBeRT) -> DataFrame:
138
140
return df
139
141
140
142
141
- def get_signal_model_summary (name : str , all_settings : dict , df : DataFrame ) -> dict :
143
+ def get_signal_model_summary (
144
+ name : str , all_settings : dict , df : DataFrame
145
+ ) -> dict :
142
146
"""Create signal model summary.
143
147
144
148
Parameters
@@ -170,9 +174,15 @@ def get_signal_model_summary(name: str, all_settings: dict, df: DataFrame) -> di
170
174
"risk_type" : str (df .risk_type .values [0 ]),
171
175
"risk_unit" : str (df .risk_unit .values [0 ]),
172
176
}
173
- summary ["risk_bounds" ] = [float (risk_exposures .min ()), float (risk_exposures .max ())]
177
+ summary ["risk_bounds" ] = [
178
+ float (risk_exposures .min ()),
179
+ float (risk_exposures .max ()),
180
+ ]
174
181
risk_mean = np .vstack (
175
- [risk_exposures [:, [0 , 1 ]].mean (axis = 1 ), risk_exposures [:, [2 , 3 ]].mean (axis = 1 )]
182
+ [
183
+ risk_exposures [:, [0 , 1 ]].mean (axis = 1 ),
184
+ risk_exposures [:, [2 , 3 ]].mean (axis = 1 ),
185
+ ]
176
186
)
177
187
risk_mean .sort (axis = 0 )
178
188
summary ["risk_score_bounds" ] = [
@@ -246,7 +256,9 @@ def get_cov_finder(settings: dict, cov_finder_linear_model: MRBRT) -> CovFinder:
246
256
pre_selected_covs = settings ["cov_finder" ]["pre_selected_covs" ]
247
257
if isinstance (pre_selected_covs , str ):
248
258
pre_selected_covs = [pre_selected_covs ]
249
- pre_selected_covs = [col .replace ("cov_" , "em_" ) for col in pre_selected_covs ]
259
+ pre_selected_covs = [
260
+ col .replace ("cov_" , "em_" ) for col in pre_selected_covs
261
+ ]
250
262
if "signal" not in pre_selected_covs :
251
263
pre_selected_covs .append ("signal" )
252
264
settings ["cov_finder" ]["pre_selected_covs" ] = pre_selected_covs
@@ -296,7 +308,9 @@ def get_cov_finder_result(
296
308
"""
297
309
beta_info = get_beta_info (cov_finder_linear_model , cov_name = "signal" )
298
310
selected_covs = [
299
- cov_name for cov_name in cov_finder .selected_covs if cov_name != "signal"
311
+ cov_name
312
+ for cov_name in cov_finder .selected_covs
313
+ if cov_name != "signal"
300
314
]
301
315
cov_finder_result = {
302
316
"beta_sd" : float (beta_info [1 ] * 0.1 ),
@@ -331,14 +345,17 @@ def get_linear_model(df: DataFrame, cov_finder_result: dict) -> MRBRT:
331
345
col_data_id = "seq" ,
332
346
)
333
347
cov_models = [
334
- LinearCovModel ("signal" , use_re = False , prior_beta_uniform = [0.0 , np .inf ]),
348
+ LinearCovModel (
349
+ "signal" , use_re = False , prior_beta_uniform = [0.0 , np .inf ]
350
+ ),
335
351
LinearCovModel ("re_signal" , use_re = True , prior_beta_uniform = [0.0 , 0.0 ]),
336
352
LinearCovModel ("intercept" , use_re = True , prior_beta_uniform = [0.0 , 0.0 ]),
337
353
]
338
354
for cov_name in cov_finder_result ["selected_covs" ]:
339
355
cov_models .append (
340
356
LinearCovModel (
341
- cov_name , prior_beta_gaussian = [0.0 , cov_finder_result ["beta_sd" ]]
357
+ cov_name ,
358
+ prior_beta_gaussian = [0.0 , cov_finder_result ["beta_sd" ]],
342
359
)
343
360
)
344
361
model = MRBRT (data , cov_models )
@@ -414,11 +431,9 @@ def get_linear_model_summary(
414
431
summary ["score" ] = float ("nan" )
415
432
summary ["star_rating" ] = 0
416
433
else :
417
- score = float (
418
- ((sign * burden_of_proof )[:, index ].mean (axis = 1 )).min ()
419
- )
434
+ score = float (((sign * burden_of_proof )[:, index ].mean (axis = 1 )).min ())
420
435
summary ["score" ] = score
421
- #Assign star rating based on ROS
436
+ # Assign star rating based on ROS
422
437
if np .isnan (score ):
423
438
summary ["star_rating" ] = 0
424
439
elif score > np .log (1 + 0.85 ):
@@ -436,7 +451,8 @@ def get_linear_model_summary(
436
451
index = df .is_outlier == 0
437
452
residual = df .ln_rr .values [index ] - df .signal .values [index ] * beta_info [0 ]
438
453
residual_sd = np .sqrt (
439
- df .ln_rr_se .values [index ] ** 2 + df .re_signal .values [index ] ** 2 * gamma_info [0 ]
454
+ df .ln_rr_se .values [index ] ** 2
455
+ + df .re_signal .values [index ] ** 2 * gamma_info [0 ]
440
456
)
441
457
weighted_residual = residual / residual_sd
442
458
r_mean = weighted_residual .mean ()
@@ -485,20 +501,26 @@ def get_draws(
485
501
summary ["beta" ][1 ] ** 2 + summary ["gamma" ][0 ] + 2 * summary ["gamma" ][1 ]
486
502
)
487
503
inner_beta_samples = np .random .normal (
488
- loc = summary ["beta" ][0 ], scale = inner_beta_sd , size = settings ["draws" ]["num_draws" ]
504
+ loc = summary ["beta" ][0 ],
505
+ scale = inner_beta_sd ,
506
+ size = settings ["draws" ]["num_draws" ],
489
507
)
490
508
outer_beta_samples = np .random .normal (
491
- loc = summary ["beta" ][0 ], scale = outer_beta_sd , size = settings ["draws" ]["num_draws" ]
509
+ loc = summary ["beta" ][0 ],
510
+ scale = outer_beta_sd ,
511
+ size = settings ["draws" ]["num_draws" ],
492
512
)
493
513
inner_draws = np .outer (signal , inner_beta_samples )
494
514
outer_draws = np .outer (signal , outer_beta_samples )
495
515
df_inner_draws = pd .DataFrame (
496
516
np .hstack ([risk [:, None ], inner_draws ]),
497
- columns = ["risk" ] + [f"draw_{ i } " for i in range (settings ["draws" ]["num_draws" ])],
517
+ columns = ["risk" ]
518
+ + [f"draw_{ i } " for i in range (settings ["draws" ]["num_draws" ])],
498
519
)
499
520
df_outer_draws = pd .DataFrame (
500
521
np .hstack ([risk [:, None ], outer_draws ]),
501
- columns = ["risk" ] + [f"draw_{ i } " for i in range (settings ["draws" ]["num_draws" ])],
522
+ columns = ["risk" ]
523
+ + [f"draw_{ i } " for i in range (settings ["draws" ]["num_draws" ])],
502
524
)
503
525
504
526
return df_inner_draws , df_outer_draws
@@ -609,7 +631,9 @@ def plot_signal_model(
609
631
fig , ax = plt .subplots (figsize = (8 , 5 ))
610
632
611
633
# plot data
612
- _plot_data (name , summary , df , ax , signal_model = signal_model , show_ref = show_ref )
634
+ _plot_data (
635
+ name , summary , df , ax , signal_model = signal_model , show_ref = show_ref
636
+ )
613
637
614
638
# plot curve
615
639
risk = np .linspace (* summary ["risk_bounds" ], 100 )
@@ -656,7 +680,9 @@ def plot_linear_model(
656
680
fig , ax = plt .subplots (1 , 2 , figsize = (16 , 5 ))
657
681
658
682
# plot data
659
- _plot_data (name , summary , df , ax [0 ], signal_model , linear_model , show_ref = show_ref )
683
+ _plot_data (
684
+ name , summary , df , ax [0 ], signal_model , linear_model , show_ref = show_ref
685
+ )
660
686
661
687
# plot curve and uncertainty
662
688
beta = summary ["beta" ]
@@ -737,8 +763,12 @@ def _plot_data(
737
763
ref_ln_rr = signal_model .predict (
738
764
MRData (
739
765
covs = {
740
- "ref_risk_lower" : np .repeat (summary ["risk_bounds" ][0 ], ref_risk .size ),
741
- "ref_risk_upper" : np .repeat (summary ["risk_bounds" ][0 ], ref_risk .size ),
766
+ "ref_risk_lower" : np .repeat (
767
+ summary ["risk_bounds" ][0 ], ref_risk .size
768
+ ),
769
+ "ref_risk_upper" : np .repeat (
770
+ summary ["risk_bounds" ][0 ], ref_risk .size
771
+ ),
742
772
"alt_risk_lower" : ref_risk ,
743
773
"alt_risk_upper" : ref_risk ,
744
774
}
@@ -777,7 +807,13 @@ def _plot_data(
777
807
)
778
808
if show_ref :
779
809
for x_0 , y_0 , x_1 , y_1 in zip (alt_risk , alt_ln_rr , ref_risk , ref_ln_rr ):
780
- ax .plot ([x_0 , x_1 ], [y_0 , y_1 ], color = "#008080" , linewidth = 0.5 , alpha = 0.5 )
810
+ ax .plot (
811
+ [x_0 , x_1 ],
812
+ [y_0 , y_1 ],
813
+ color = "#008080" ,
814
+ linewidth = 0.5 ,
815
+ alpha = 0.5 ,
816
+ )
781
817
782
818
# plot support lines
783
819
ax .axhline (0.0 , linewidth = 1 , linestyle = "-" , color = "gray" )
@@ -818,15 +854,21 @@ def _plot_funnel(
818
854
# add residual information
819
855
beta , gamma = summary ["beta" ], summary ["gamma" ]
820
856
residual = df .ln_rr .values - df .signal .values * beta [0 ]
821
- residual_sd = np .sqrt (df .ln_rr_se .values ** 2 + df .re_signal .values ** 2 * gamma [0 ])
857
+ residual_sd = np .sqrt (
858
+ df .ln_rr_se .values ** 2 + df .re_signal .values ** 2 * gamma [0 ]
859
+ )
822
860
823
861
# plot funnel
824
862
index = df .is_outlier == 1
825
863
sd_max = residual_sd .max () * 1.1
826
864
827
865
ax .set_ylim (sd_max , 0.0 )
828
- ax .scatter (residual , residual_sd , color = "#008080" , alpha = 0.5 , edgecolor = "none" )
829
- ax .scatter (residual [index ], residual_sd [index ], color = "red" , alpha = 0.5 , marker = "x" )
866
+ ax .scatter (
867
+ residual , residual_sd , color = "#008080" , alpha = 0.5 , edgecolor = "none"
868
+ )
869
+ ax .scatter (
870
+ residual [index ], residual_sd [index ], color = "red" , alpha = 0.5 , marker = "x"
871
+ )
830
872
ax .fill_betweenx (
831
873
[0.0 , sd_max ],
832
874
[0.0 , - 1.96 * sd_max ],
0 commit comments