Skip to content

Commit 3801e1d

Browse files
Update to version 0.0.6 (#12)
* fix plotting issue and score computation issue (#8) * Pipeline action (#9) * fix plotting issue and score computation issue * make pre-processing part of the first action * add auto sorting of exposure for computing the bounds for score * Make plot consistent and adding options (#10) * add plot of bprf and expand uncertainty to 95% * add option to omit the line that connect the reference and alternative exposures * default figure setting to true * Add star rating to summary file (#11) * Add star rating based on risk-outcome score to summary file for continuous and dichotomous pipelines * Update ROS bounds to be more interpretable * update version in pyproject.toml --------- Co-authored-by: n-gilbertson <148926375+n-gilbertson@users.noreply.github.com>
1 parent e971015 commit 3801e1d

File tree

6 files changed

+91
-22
lines changed

6 files changed

+91
-22
lines changed

data/continuous/settings.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,4 +60,7 @@ default:
6060
# for monotonic curve this is usually false
6161
# only for j-shaped this can be true
6262
normalize_to_tmrel: false
63+
figure:
64+
# if we want to show the connection between the reference and alternative exposure
65+
show_ref: true
6366

pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "bopforge"
7-
version = "0.0.5"
7+
version = "0.0.6"
88
description = "Pipelines for Burden of Proof (BoP) analyses"
99
readme = "REDME.md"
1010
requires-python = ">=3.10"
@@ -28,5 +28,5 @@ dichotomous_pipeline = "bopforge.dichotomous_pipeline.__main__:main"
2828
[tool.sphinx]
2929
project = "modrover"
3030
author = "IHME Math Sciences"
31-
copyright = "2023, IHME Math Sciences"
32-
version = "0.0.4"
31+
copyright = "2024, IHME Math Sciences"
32+
version = "0.0.6"

src/bopforge/continuous_pipeline/__main__.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ def fit_signal_model(dataif: DataInterface) -> None:
5252
Data interface in charge of file reading and writing.
5353
5454
"""
55+
pre_processing(dataif)
5556
name = dataif.result.name
5657

5758
df = dataif.load_result(f"{name}.csv")
@@ -66,7 +67,13 @@ def fit_signal_model(dataif: DataInterface) -> None:
6667

6768
summary = functions.get_signal_model_summary(name, all_settings, df)
6869

69-
fig = functions.plot_signal_model(name, summary, df, signal_model)
70+
fig = functions.plot_signal_model(
71+
name,
72+
summary,
73+
df,
74+
signal_model,
75+
show_ref=all_settings["figure"]["show_ref"],
76+
)
7077

7178
dataif.dump_result(df, f"{name}.csv")
7279
dataif.dump_result(signal_model, "signal_model.pkl")
@@ -153,7 +160,14 @@ def fit_linear_model(dataif: DataInterface) -> None:
153160
settings, summary, signal_model
154161
)
155162

156-
fig = functions.plot_linear_model(name, summary, df, signal_model, linear_model)
163+
fig = functions.plot_linear_model(
164+
name,
165+
summary,
166+
df,
167+
signal_model,
168+
linear_model,
169+
show_ref=all_settings["figure"]["show_ref"],
170+
)
157171

158172
dataif.dump_result(linear_model, "linear_model.pkl")
159173
dataif.dump_result(summary, "summary.yaml")
@@ -214,7 +228,6 @@ def run(
214228

215229
np.random.seed(pair_settings["seed"])
216230
pair_dataif = DataInterface(result=pair_o_dir)
217-
pre_processing(pair_dataif)
218231
for action in actions:
219232
globals()[action](pair_dataif)
220233

src/bopforge/continuous_pipeline/functions.py

Lines changed: 51 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -171,9 +171,13 @@ def get_signal_model_summary(name: str, all_settings: dict, df: DataFrame) -> di
171171
"risk_unit": str(df.risk_unit.values[0]),
172172
}
173173
summary["risk_bounds"] = [float(risk_exposures.min()), float(risk_exposures.max())]
174+
risk_mean = np.vstack(
175+
[risk_exposures[:, [0, 1]].mean(axis=1), risk_exposures[:, [2, 3]].mean(axis=1)]
176+
)
177+
risk_mean.sort(axis=0)
174178
summary["risk_score_bounds"] = [
175-
float(np.quantile(risk_exposures[:, [0, 1]].mean(axis=1), 0.15)),
176-
float(np.quantile(risk_exposures[:, [2, 3]].mean(axis=1), 0.85)),
179+
float(np.quantile(risk_mean[0], 0.15)),
180+
float(np.quantile(risk_mean[1], 0.85)),
177181
]
178182
summary["normalize_to_tmrel"] = all_settings["complete_summary"]["score"][
179183
"normalize_to_tmrel"
@@ -379,7 +383,7 @@ def get_linear_model_summary(
379383
summary["beta"] = [float(beta_info[0]), float(beta_info[1])]
380384
summary["gamma"] = [float(gamma_info[0]), float(gamma_info[1])]
381385

382-
# compute the score
386+
# compute the score and add star rating
383387
risk = np.linspace(*summary["risk_bounds"], 100)
384388
signal = get_signal(signal_model, risk)
385389
beta_sd = np.sqrt(beta_info[1] ** 2 + gamma_info[0] + 2 * gamma_info[1])
@@ -408,10 +412,25 @@ def get_linear_model_summary(
408412
sign = np.sign(pred)
409413
if np.any(np.prod(inner_ui[:, index], axis=0) < 0):
410414
summary["score"] = float("nan")
415+
summary["star_rating"] = 0
411416
else:
412-
summary["score"] = float(
417+
score = float(
413418
((sign * burden_of_proof)[:, index].mean(axis=1)).min()
414419
)
420+
summary["score"] = score
421+
#Assign star rating based on ROS
422+
if np.isnan(score):
423+
summary["star_rating"] = 0
424+
elif score > np.log(1 + 0.85):
425+
summary["star_rating"] = 5
426+
elif score > np.log(1 + 0.50):
427+
summary["star_rating"] = 4
428+
elif score > np.log(1 + 0.15):
429+
summary["star_rating"] = 3
430+
elif score > 0:
431+
summary["star_rating"] = 2
432+
else:
433+
summary["star_rating"] = 1
415434

416435
# compute the publication bias
417436
index = df.is_outlier == 0
@@ -559,7 +578,11 @@ def get_quantiles(
559578

560579

561580
def plot_signal_model(
562-
name: str, summary: dict, df: DataFrame, signal_model: MRBeRT
581+
name: str,
582+
summary: dict,
583+
df: DataFrame,
584+
signal_model: MRBeRT,
585+
show_ref: bool = True,
563586
) -> Figure:
564587
"""Plot the signal model
565588
@@ -573,6 +596,8 @@ def plot_signal_model(
573596
Data frame contains training data.
574597
signal_model
575598
Fitted signal model for risk curve.
599+
show_ref
600+
Whether to show the reference line. Default is `True`.
576601
577602
Returns
578603
-------
@@ -584,7 +609,7 @@ def plot_signal_model(
584609
fig, ax = plt.subplots(figsize=(8, 5))
585610

586611
# plot data
587-
_plot_data(name, summary, df, ax, signal_model=signal_model)
612+
_plot_data(name, summary, df, ax, signal_model=signal_model, show_ref=show_ref)
588613

589614
# plot curve
590615
risk = np.linspace(*summary["risk_bounds"], 100)
@@ -602,6 +627,7 @@ def plot_linear_model(
602627
df: DataFrame,
603628
signal_model: MRBeRT,
604629
linear_model: MRBRT,
630+
show_ref: bool = True,
605631
) -> Figure:
606632
"""Plot the linear model
607633
@@ -617,6 +643,8 @@ def plot_linear_model(
617643
Fitted signal model for risk curve.
618644
linear_model
619645
Fitted linear model for risk curve.
646+
show_ref
647+
Whether to show the reference line. Default is `True`.
620648
621649
Returns
622650
-------
@@ -628,7 +656,7 @@ def plot_linear_model(
628656
fig, ax = plt.subplots(1, 2, figsize=(16, 5))
629657

630658
# plot data
631-
_plot_data(name, summary, df, ax[0], signal_model, linear_model)
659+
_plot_data(name, summary, df, ax[0], signal_model, linear_model, show_ref=show_ref)
632660

633661
# plot curve and uncertainty
634662
beta = summary["beta"]
@@ -642,11 +670,11 @@ def plot_linear_model(
642670
pred = np.outer(
643671
np.array(
644672
[
645-
beta[0] - 1.645 * outer_beta_sd,
646-
beta[0] - 1.645 * inner_beta_sd,
673+
beta[0] - 1.96 * outer_beta_sd,
674+
beta[0] - 1.96 * inner_beta_sd,
647675
beta[0],
648-
beta[0] + 1.645 * inner_beta_sd,
649-
beta[0] + 1.645 * outer_beta_sd,
676+
beta[0] + 1.96 * inner_beta_sd,
677+
beta[0] + 1.96 * outer_beta_sd,
650678
]
651679
),
652680
signal,
@@ -655,9 +683,12 @@ def plot_linear_model(
655683
if summary["normalize_to_tmrel"]:
656684
pred -= pred[:, [np.argmin(pred[2])]]
657685

686+
log_bprf = pred[2] * (1.0 - 1.645 * outer_beta_sd / beta[0])
687+
658688
ax[0].plot(risk, pred[2], color="#008080")
659689
ax[0].fill_between(risk, pred[0], pred[4], color="gray", alpha=0.2)
660690
ax[0].fill_between(risk, pred[1], pred[3], color="gray", alpha=0.2)
691+
ax[0].plot(risk, log_bprf, color="red")
661692

662693
# plot funnel
663694
_plot_funnel(summary, df, ax[1])
@@ -672,6 +703,7 @@ def _plot_data(
672703
ax: Axes,
673704
signal_model: MRBeRT = None,
674705
linear_model: Optional[MRBRT] = None,
706+
show_ref: bool = True,
675707
) -> Axes:
676708
"""Plot data points
677709
@@ -690,6 +722,8 @@ def _plot_data(
690722
the points are plotted reference to original signal model. When linear
691723
model is provided, the points are plotted reference to the linear model
692724
risk curve.
725+
show_ref
726+
Whether to show the reference line. Default is `True`.
693727
694728
Returns
695729
-------
@@ -718,6 +752,9 @@ def _plot_data(
718752
if summary["normalize_to_tmrel"]:
719753
risk = np.linspace(*summary["risk_bounds"], 100)
720754
signal = get_signal(signal_model, risk)
755+
if linear_model is not None:
756+
signal *= linear_model.beta_soln[0]
757+
ref_ln_rr -= signal.min()
721758
alt_ln_rr -= signal.min()
722759

723760
# plot data points
@@ -738,8 +775,9 @@ def _plot_data(
738775
alpha=0.5,
739776
marker="x",
740777
)
741-
for x_0, y_0, x_1, y_1 in zip(alt_risk, alt_ln_rr, ref_risk, ref_ln_rr):
742-
ax.plot([x_0, x_1], [y_0, y_1], color="#008080", linewidth=0.5, alpha=0.5)
778+
if show_ref:
779+
for x_0, y_0, x_1, y_1 in zip(alt_risk, alt_ln_rr, ref_risk, ref_ln_rr):
780+
ax.plot([x_0, x_1], [y_0, y_1], color="#008080", linewidth=0.5, alpha=0.5)
743781

744782
# plot support lines
745783
ax.axhline(0.0, linewidth=1, linestyle="-", color="gray")

src/bopforge/dichotomous_pipeline/__main__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ def fit_signal_model(result_folder: Path) -> None:
5555
Data interface in charge of file reading and writing.
5656
5757
"""
58+
pre_processing(result_folder)
5859
dataif = DataInterface(result=result_folder)
5960
name = dataif.result.name
6061

@@ -204,7 +205,6 @@ def run(
204205
dataif.dump_o_dir(pair_settings, pair, "settings.yaml")
205206

206207
np.random.seed(pair_settings["seed"])
207-
pre_processing(pair_o_dir)
208208
for action in actions:
209209
globals()[action](pair_o_dir)
210210

src/bopforge/dichotomous_pipeline/functions.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,16 +235,31 @@ def get_linear_model_summary(
235235
summary["beta"] = [float(beta_info[0]), float(beta_info[1])]
236236
summary["gamma"] = [float(gamma_info[0]), float(gamma_info[1])]
237237

238-
# compute the score
238+
# compute the score and add star rating
239239
beta_sd = np.sqrt(beta_info[1] ** 2 + gamma_info[0] + 2 * gamma_info[1])
240240
sign = np.sign(beta_info[0])
241241
inner_ui = beta_info[0] - sign * 1.96 * beta_info[1]
242242
burden_of_proof = beta_info[0] - sign * 1.645 * beta_sd
243243

244244
if inner_ui * beta_info[0] < 0:
245245
summary["score"] = float("nan")
246+
summary["star_rating"] = 0
246247
else:
247-
summary["score"] = float(0.5 * sign * burden_of_proof)
248+
score = float(0.5 * sign * burden_of_proof)
249+
summary["score"] = score
250+
#Assign star rating based on ROS
251+
if np.isnan(score):
252+
summary["star_rating"] = 0
253+
elif score > np.log(1 + 0.85):
254+
summary["star_rating"] = 5
255+
elif score > np.log(1 + 0.50):
256+
summary["star_rating"] = 4
257+
elif score > np.log(1 + 0.15):
258+
summary["star_rating"] = 3
259+
elif score > 0:
260+
summary["star_rating"] = 2
261+
else:
262+
summary["star_rating"] = 1
248263

249264
# compute the publication bias
250265
index = df.is_outlier == 0

0 commit comments

Comments
 (0)