Skip to content

Commit 526e4d2

Browse files
committed
bopcat dichot version with all files tracked
1 parent 8e972f1 commit 526e4d2

File tree

4 files changed

+120
-43
lines changed

4 files changed

+120
-43
lines changed

src/bopforge/continuous_pipeline/functions.py

Lines changed: 67 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,9 @@ def get_signal_model(settings: dict, df: DataFrame) -> MRBeRT:
5858

5959
for arg in ["knot_bounds", "min_dist"]:
6060
if not np.isscalar(settings["knots_samples"][arg]):
61-
settings["knots_samples"][arg] = np.asarray(settings["knots_samples"][arg])
61+
settings["knots_samples"][arg] = np.asarray(
62+
settings["knots_samples"][arg]
63+
)
6264
settings["knots_samples"] = {
6365
**dict(
6466
num_knots=len(settings["cov_model"]["spline_knots"]) - 2,
@@ -138,7 +140,9 @@ def convert_bc_to_em(df: DataFrame, signal_model: MRBeRT) -> DataFrame:
138140
return df
139141

140142

141-
def get_signal_model_summary(name: str, all_settings: dict, df: DataFrame) -> dict:
143+
def get_signal_model_summary(
144+
name: str, all_settings: dict, df: DataFrame
145+
) -> dict:
142146
"""Create signal model summary.
143147
144148
Parameters
@@ -170,9 +174,15 @@ def get_signal_model_summary(name: str, all_settings: dict, df: DataFrame) -> di
170174
"risk_type": str(df.risk_type.values[0]),
171175
"risk_unit": str(df.risk_unit.values[0]),
172176
}
173-
summary["risk_bounds"] = [float(risk_exposures.min()), float(risk_exposures.max())]
177+
summary["risk_bounds"] = [
178+
float(risk_exposures.min()),
179+
float(risk_exposures.max()),
180+
]
174181
risk_mean = np.vstack(
175-
[risk_exposures[:, [0, 1]].mean(axis=1), risk_exposures[:, [2, 3]].mean(axis=1)]
182+
[
183+
risk_exposures[:, [0, 1]].mean(axis=1),
184+
risk_exposures[:, [2, 3]].mean(axis=1),
185+
]
176186
)
177187
risk_mean.sort(axis=0)
178188
summary["risk_score_bounds"] = [
@@ -246,7 +256,9 @@ def get_cov_finder(settings: dict, cov_finder_linear_model: MRBRT) -> CovFinder:
246256
pre_selected_covs = settings["cov_finder"]["pre_selected_covs"]
247257
if isinstance(pre_selected_covs, str):
248258
pre_selected_covs = [pre_selected_covs]
249-
pre_selected_covs = [col.replace("cov_", "em_") for col in pre_selected_covs]
259+
pre_selected_covs = [
260+
col.replace("cov_", "em_") for col in pre_selected_covs
261+
]
250262
if "signal" not in pre_selected_covs:
251263
pre_selected_covs.append("signal")
252264
settings["cov_finder"]["pre_selected_covs"] = pre_selected_covs
@@ -296,7 +308,9 @@ def get_cov_finder_result(
296308
"""
297309
beta_info = get_beta_info(cov_finder_linear_model, cov_name="signal")
298310
selected_covs = [
299-
cov_name for cov_name in cov_finder.selected_covs if cov_name != "signal"
311+
cov_name
312+
for cov_name in cov_finder.selected_covs
313+
if cov_name != "signal"
300314
]
301315
cov_finder_result = {
302316
"beta_sd": float(beta_info[1] * 0.1),
@@ -331,14 +345,17 @@ def get_linear_model(df: DataFrame, cov_finder_result: dict) -> MRBRT:
331345
col_data_id="seq",
332346
)
333347
cov_models = [
334-
LinearCovModel("signal", use_re=False, prior_beta_uniform=[0.0, np.inf]),
348+
LinearCovModel(
349+
"signal", use_re=False, prior_beta_uniform=[0.0, np.inf]
350+
),
335351
LinearCovModel("re_signal", use_re=True, prior_beta_uniform=[0.0, 0.0]),
336352
LinearCovModel("intercept", use_re=True, prior_beta_uniform=[0.0, 0.0]),
337353
]
338354
for cov_name in cov_finder_result["selected_covs"]:
339355
cov_models.append(
340356
LinearCovModel(
341-
cov_name, prior_beta_gaussian=[0.0, cov_finder_result["beta_sd"]]
357+
cov_name,
358+
prior_beta_gaussian=[0.0, cov_finder_result["beta_sd"]],
342359
)
343360
)
344361
model = MRBRT(data, cov_models)
@@ -414,11 +431,9 @@ def get_linear_model_summary(
414431
summary["score"] = float("nan")
415432
summary["star_rating"] = 0
416433
else:
417-
score = float(
418-
((sign * burden_of_proof)[:, index].mean(axis=1)).min()
419-
)
434+
score = float(((sign * burden_of_proof)[:, index].mean(axis=1)).min())
420435
summary["score"] = score
421-
#Assign star rating based on ROS
436+
# Assign star rating based on ROS
422437
if np.isnan(score):
423438
summary["star_rating"] = 0
424439
elif score > np.log(1 + 0.85):
@@ -436,7 +451,8 @@ def get_linear_model_summary(
436451
index = df.is_outlier == 0
437452
residual = df.ln_rr.values[index] - df.signal.values[index] * beta_info[0]
438453
residual_sd = np.sqrt(
439-
df.ln_rr_se.values[index] ** 2 + df.re_signal.values[index] ** 2 * gamma_info[0]
454+
df.ln_rr_se.values[index] ** 2
455+
+ df.re_signal.values[index] ** 2 * gamma_info[0]
440456
)
441457
weighted_residual = residual / residual_sd
442458
r_mean = weighted_residual.mean()
@@ -485,20 +501,26 @@ def get_draws(
485501
summary["beta"][1] ** 2 + summary["gamma"][0] + 2 * summary["gamma"][1]
486502
)
487503
inner_beta_samples = np.random.normal(
488-
loc=summary["beta"][0], scale=inner_beta_sd, size=settings["draws"]["num_draws"]
504+
loc=summary["beta"][0],
505+
scale=inner_beta_sd,
506+
size=settings["draws"]["num_draws"],
489507
)
490508
outer_beta_samples = np.random.normal(
491-
loc=summary["beta"][0], scale=outer_beta_sd, size=settings["draws"]["num_draws"]
509+
loc=summary["beta"][0],
510+
scale=outer_beta_sd,
511+
size=settings["draws"]["num_draws"],
492512
)
493513
inner_draws = np.outer(signal, inner_beta_samples)
494514
outer_draws = np.outer(signal, outer_beta_samples)
495515
df_inner_draws = pd.DataFrame(
496516
np.hstack([risk[:, None], inner_draws]),
497-
columns=["risk"] + [f"draw_{i}" for i in range(settings["draws"]["num_draws"])],
517+
columns=["risk"]
518+
+ [f"draw_{i}" for i in range(settings["draws"]["num_draws"])],
498519
)
499520
df_outer_draws = pd.DataFrame(
500521
np.hstack([risk[:, None], outer_draws]),
501-
columns=["risk"] + [f"draw_{i}" for i in range(settings["draws"]["num_draws"])],
522+
columns=["risk"]
523+
+ [f"draw_{i}" for i in range(settings["draws"]["num_draws"])],
502524
)
503525

504526
return df_inner_draws, df_outer_draws
@@ -609,7 +631,9 @@ def plot_signal_model(
609631
fig, ax = plt.subplots(figsize=(8, 5))
610632

611633
# plot data
612-
_plot_data(name, summary, df, ax, signal_model=signal_model, show_ref=show_ref)
634+
_plot_data(
635+
name, summary, df, ax, signal_model=signal_model, show_ref=show_ref
636+
)
613637

614638
# plot curve
615639
risk = np.linspace(*summary["risk_bounds"], 100)
@@ -656,7 +680,9 @@ def plot_linear_model(
656680
fig, ax = plt.subplots(1, 2, figsize=(16, 5))
657681

658682
# plot data
659-
_plot_data(name, summary, df, ax[0], signal_model, linear_model, show_ref=show_ref)
683+
_plot_data(
684+
name, summary, df, ax[0], signal_model, linear_model, show_ref=show_ref
685+
)
660686

661687
# plot curve and uncertainty
662688
beta = summary["beta"]
@@ -737,8 +763,12 @@ def _plot_data(
737763
ref_ln_rr = signal_model.predict(
738764
MRData(
739765
covs={
740-
"ref_risk_lower": np.repeat(summary["risk_bounds"][0], ref_risk.size),
741-
"ref_risk_upper": np.repeat(summary["risk_bounds"][0], ref_risk.size),
766+
"ref_risk_lower": np.repeat(
767+
summary["risk_bounds"][0], ref_risk.size
768+
),
769+
"ref_risk_upper": np.repeat(
770+
summary["risk_bounds"][0], ref_risk.size
771+
),
742772
"alt_risk_lower": ref_risk,
743773
"alt_risk_upper": ref_risk,
744774
}
@@ -777,7 +807,13 @@ def _plot_data(
777807
)
778808
if show_ref:
779809
for x_0, y_0, x_1, y_1 in zip(alt_risk, alt_ln_rr, ref_risk, ref_ln_rr):
780-
ax.plot([x_0, x_1], [y_0, y_1], color="#008080", linewidth=0.5, alpha=0.5)
810+
ax.plot(
811+
[x_0, x_1],
812+
[y_0, y_1],
813+
color="#008080",
814+
linewidth=0.5,
815+
alpha=0.5,
816+
)
781817

782818
# plot support lines
783819
ax.axhline(0.0, linewidth=1, linestyle="-", color="gray")
@@ -818,15 +854,21 @@ def _plot_funnel(
818854
# add residual information
819855
beta, gamma = summary["beta"], summary["gamma"]
820856
residual = df.ln_rr.values - df.signal.values * beta[0]
821-
residual_sd = np.sqrt(df.ln_rr_se.values**2 + df.re_signal.values**2 * gamma[0])
857+
residual_sd = np.sqrt(
858+
df.ln_rr_se.values**2 + df.re_signal.values**2 * gamma[0]
859+
)
822860

823861
# plot funnel
824862
index = df.is_outlier == 1
825863
sd_max = residual_sd.max() * 1.1
826864

827865
ax.set_ylim(sd_max, 0.0)
828-
ax.scatter(residual, residual_sd, color="#008080", alpha=0.5, edgecolor="none")
829-
ax.scatter(residual[index], residual_sd[index], color="red", alpha=0.5, marker="x")
866+
ax.scatter(
867+
residual, residual_sd, color="#008080", alpha=0.5, edgecolor="none"
868+
)
869+
ax.scatter(
870+
residual[index], residual_sd[index], color="red", alpha=0.5, marker="x"
871+
)
830872
ax.fill_betweenx(
831873
[0.0, sd_max],
832874
[0.0, -1.96 * sd_max],

src/bopforge/dichotomous_pipeline/__main__.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,9 @@ def fit_linear_model(result_folder: Path) -> None:
144144

145145
df_inner_draws, df_outer_draws = functions.get_draws(settings, summary)
146146

147-
df_inner_quantiles, df_outer_quantiles = functions.get_quantiles(settings, summary)
147+
df_inner_quantiles, df_outer_quantiles = functions.get_quantiles(
148+
settings, summary
149+
)
148150

149151
fig = functions.plot_linear_model(summary, df)
150152

@@ -212,7 +214,11 @@ def run(
212214
def main(args=None) -> None:
213215
parser = ArgumentParser(description="Dichotomous burden of proof pipeline.")
214216
parser.add_argument(
215-
"-i", "--input", type=os.path.abspath, required=True, help="Input data folder"
217+
"-i",
218+
"--input",
219+
type=os.path.abspath,
220+
required=True,
221+
help="Input data folder",
216222
)
217223
parser.add_argument(
218224
"-o",

src/bopforge/dichotomous_pipeline/functions.py

Lines changed: 44 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
import matplotlib.pyplot as plt
22
import numpy as np
33
import pandas as pd
4-
from bopforge.utils import get_beta_info, get_gamma_info
54
from matplotlib.pyplot import Axes, Figure
65
from mrtool import MRBRT, CovFinder, LinearCovModel, MRData
76
from pandas import DataFrame
87
from scipy.stats import norm
98

9+
from bopforge.utils import get_beta_info, get_gamma_info
10+
1011

1112
def get_signal_model(settings: dict, df: DataFrame) -> MRBRT:
1213
"""Create signal model for outliers identification and covariate selection
@@ -117,7 +118,8 @@ def get_cov_finder(settings: dict, cov_finder_linear_model: MRBRT) -> CovFinder:
117118
candidate_covs = [
118119
cov_name
119120
for cov_name in data.covs.keys()
120-
if cov_name not in settings["cov_finder"]["pre_selected_covs"] + ["intercept"]
121+
if cov_name
122+
not in settings["cov_finder"]["pre_selected_covs"] + ["intercept"]
121123
]
122124
settings["cov_finder"] = {
123125
**dict(
@@ -140,7 +142,9 @@ def get_cov_finder(settings: dict, cov_finder_linear_model: MRBRT) -> CovFinder:
140142
return cov_finder
141143

142144

143-
def get_cov_finder_result(cov_finder_linear_model: MRBRT, cov_finder: MRBRT) -> dict:
145+
def get_cov_finder_result(
146+
cov_finder_linear_model: MRBRT, cov_finder: MRBRT
147+
) -> dict:
144148
"""Summarize result from bias covariate selection.
145149
146150
Parameters
@@ -158,7 +162,9 @@ def get_cov_finder_result(cov_finder_linear_model: MRBRT, cov_finder: MRBRT) ->
158162
"""
159163
beta_info = get_beta_info(cov_finder_linear_model, cov_name="intercept")
160164
selected_covs = [
161-
cov_name for cov_name in cov_finder.selected_covs if cov_name != "intercept"
165+
cov_name
166+
for cov_name in cov_finder.selected_covs
167+
if cov_name != "intercept"
162168
]
163169

164170
# save results
@@ -201,7 +207,8 @@ def get_linear_model(df: DataFrame, cov_finder_result: dict) -> MRBRT:
201207
for cov_name in cov_finder_result["selected_covs"]:
202208
cov_models.append(
203209
LinearCovModel(
204-
cov_name, prior_beta_gaussian=[0.0, cov_finder_result["beta_sd"]]
210+
cov_name,
211+
prior_beta_gaussian=[0.0, cov_finder_result["beta_sd"]],
205212
)
206213
)
207214
model = MRBRT(data, cov_models)
@@ -247,7 +254,7 @@ def get_linear_model_summary(
247254
else:
248255
score = float(0.5 * sign * burden_of_proof)
249256
summary["score"] = score
250-
#Assign star rating based on ROS
257+
# Assign star rating based on ROS
251258
if np.isnan(score):
252259
summary["star_rating"] = 0
253260
elif score > np.log(1 + 0.85):
@@ -297,12 +304,18 @@ def get_draws(
297304
beta_info = summary["beta"]
298305
gamma_info = summary["gamma"]
299306
inner_beta_sd = beta_info[1]
300-
outer_beta_sd = np.sqrt(beta_info[1] ** 2 + gamma_info[0] + 2 * gamma_info[1])
307+
outer_beta_sd = np.sqrt(
308+
beta_info[1] ** 2 + gamma_info[0] + 2 * gamma_info[1]
309+
)
301310
inner_beta_samples = np.random.normal(
302-
loc=beta_info[0], scale=inner_beta_sd, size=settings["draws"]["num_draws"]
311+
loc=beta_info[0],
312+
scale=inner_beta_sd,
313+
size=settings["draws"]["num_draws"],
303314
)
304315
outer_beta_samples = np.random.normal(
305-
loc=beta_info[0], scale=outer_beta_sd, size=settings["draws"]["num_draws"]
316+
loc=beta_info[0],
317+
scale=outer_beta_sd,
318+
size=settings["draws"]["num_draws"],
306319
)
307320
df_inner_draws = pd.DataFrame(
308321
inner_beta_samples[None, :],
@@ -338,7 +351,9 @@ def get_quantiles(
338351
beta_info = summary["beta"]
339352
gamma_info = summary["gamma"]
340353
inner_beta_sd = beta_info[1]
341-
outer_beta_sd = np.sqrt(beta_info[1] ** 2 + gamma_info[0] + 2 * gamma_info[1])
354+
outer_beta_sd = np.sqrt(
355+
beta_info[1] ** 2 + gamma_info[0] + 2 * gamma_info[1]
356+
)
342357
inner_beta_quantiles = norm.ppf(
343358
settings["draws"]["quantiles"], loc=beta_info[0], scale=inner_beta_sd
344359
)
@@ -410,11 +425,19 @@ def _plot_funnel(summary: dict, df: DataFrame, ax: Axes) -> Axes:
410425
beta, gamma = summary["beta"], summary["gamma"]
411426
beta_inner_sd = beta[1]
412427
beta_outer_sd = np.sqrt(beta[1] ** 2 + gamma[0] + 2.0 * gamma[1])
413-
beta_inner = [beta[0] - 1.96 * beta_inner_sd, beta[0] + 1.96 * beta_inner_sd]
414-
beta_outer = [beta[0] - 1.96 * beta_outer_sd, beta[0] + 1.96 * beta_outer_sd]
428+
beta_inner = [
429+
beta[0] - 1.96 * beta_inner_sd,
430+
beta[0] + 1.96 * beta_inner_sd,
431+
]
432+
beta_outer = [
433+
beta[0] - 1.96 * beta_outer_sd,
434+
beta[0] + 1.96 * beta_outer_sd,
435+
]
415436

416437
# plot data
417-
ax.scatter(df.ln_rr, df.ln_rr_se, color="#008080", alpha=0.4, edgecolor="none")
438+
ax.scatter(
439+
df.ln_rr, df.ln_rr_se, color="#008080", alpha=0.4, edgecolor="none"
440+
)
418441
outlier_index = df.is_outlier == 1
419442
ax.scatter(
420443
df.ln_rr[outlier_index],
@@ -434,10 +457,16 @@ def _plot_funnel(summary: dict, df: DataFrame, ax: Axes) -> Axes:
434457
alpha=0.2,
435458
)
436459
ax.plot(
437-
[beta[0], beta[0] - 1.96 * se_max], [0.0, se_max], linewidth=1, color="gray"
460+
[beta[0], beta[0] - 1.96 * se_max],
461+
[0.0, se_max],
462+
linewidth=1,
463+
color="gray",
438464
)
439465
ax.plot(
440-
[beta[0], beta[0] + 1.96 * se_max], [0.0, se_max], linewidth=1, color="gray"
466+
[beta[0], beta[0] + 1.96 * se_max],
467+
[0.0, se_max],
468+
linewidth=1,
469+
color="gray",
441470
)
442471
ax.set_ylim([se_max, 0.0])
443472

tests/test_dummy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
def test_dummy():
2-
assert True
2+
assert True

0 commit comments

Comments
 (0)