ihmeuw-msca
diff --git a/‎.gitignore
Lines changed: 3 additions & 0 deletions b/‎.gitignore
Lines changed: 3 additions & 0 deletions
diff --git a/‎.pre-commit-config.yaml
Lines changed: 17 additions & 0 deletions b/‎.pre-commit-config.yaml
Lines changed: 17 additions & 0 deletions
diff --git a/‎data/categorical/README.md
Lines changed: 22 additions & 0 deletions b/‎data/categorical/README.md
Lines changed: 22 additions & 0 deletions
diff --git a/‎data/categorical/settings.yaml
Lines changed: 41 additions & 0 deletions b/‎data/categorical/settings.yaml
Lines changed: 41 additions & 0 deletions
diff --git a/‎data/categorical/wash_water-diarrhea.csv
Lines changed: 81 additions & 0 deletions b/‎data/categorical/wash_water-diarrhea.csv
Lines changed: 81 additions & 0 deletions
diff --git a/‎pyproject.toml
Lines changed: 5 additions & 4 deletions b/‎pyproject.toml
Lines changed: 5 additions & 4 deletions
diff --git a/‎ruff.toml
Lines changed: 8 additions & 0 deletions b/‎ruff.toml
Lines changed: 8 additions & 0 deletions
diff --git a/‎sphinx/api_reference/categorical_pipeline.rst
Lines changed: 20 additions & 0 deletions b/‎sphinx/api_reference/categorical_pipeline.rst
Lines changed: 20 additions & 0 deletions
diff --git a/‎src/bopforge/categorical_pipeline/__init__.py b/‎src/bopforge/categorical_pipeline/__init__.py
@@ -189,4 +189,7 @@ result/
 # vscode
 .vscode/
 
+# MacOS DS Store files
+.DS_Store
+
 results/
@@ -0,0 +1,17 @@
+repos:
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  rev: v0.4.2
+  hooks:
+    - id: ruff
+      args: [ --fix ]
+    - id: ruff-format
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v4.6.0
+  hooks:
+    - id: trailing-whitespace
+    - id: end-of-file-fixer
+- repo: https://github.com/pre-commit/mirrors-mypy
+  rev: v1.10.0
+  hooks:
+    - id: mypy
+      files: ^src
@@ -0,0 +1,22 @@
+# Data Format
+
+The data file need to stored in name `{rei}-{acause}.csv`.
+This name will be used through out the process.
+
+**Columns of the data frame**
+* `seq`: unique row id
+* `study_id`: usually equivalent with `nid` but with minor exceptions
+* `risk_type`: type of the risk, continuous, dichotomous, categorical, etc
+* `ln_rr`: mean of log relative risk
+* `ln_rr_se`: standard error of the log relative risk
+* `ref_risk_cat`: reference exposure category of the risk factor
+* `alt_risk_cat`: alternative exposure category of the risk factor
+* `cov_{name}`: bias covariates, usually related to study design
+
+
+**Settings**
+
+`settings.yaml` file contains settings for actions in the process, main actions including
+* `fit_signal_model`: fit signal model
+* `select_bias_covs`: select bias covariates
+* `fit_linear_model`: fit final linear model
@@ -0,0 +1,41 @@
+default:
+  # random seed
+  seed: 0
+  # order for clearly ordinal categories
+  cat_order: []
+  cov_type:
+    # Specify covariates by type: bias (binary) and model (interacted and non-interacted)
+    bias_covs: []
+    interacted_covs: []
+    non_interacted_covs: []
+  fit_signal_model:
+    # signal model settings, contains configurations for the exposure categories
+    cat_cov_model:
+      # reference exposure category, default is most common category in the data
+      ref_cat: 
+      # priors for categories (in increasing order)
+      # e.g., [risk_cat_1, risk_cat_2] corresponds to the logRR for risk_cat_1 <= risk_cat_2
+      # complete or partial orderings may be provided
+      prior_order: []
+    signal_model:
+      # trimming settings, inlier percentage default to be 90%, if you don't want to trim, use 1.0
+      inlier_pct: 0.9
+  select_bias_covs:
+    cov_finder:
+      # please put the pre-selected bias covariates in list
+      pre_selected_covs: []
+  complete_summary:
+    # draws configuration
+    draws:
+      num_draws: 1000
+      # additional quantiles for accurate estimation of quantiles rather than rely on draws
+      quantiles: [0.025, 0.05, 0.5, 0.95, 0.975]
+    score:
+      # if we normalize the risk 'curve' to the lowest log RR
+      # only for j-shaped this can be true
+      # NOTE: shape will be dependent on plotting order (i.e., could artificially end up with a j-shaped curve, 
+      # as risk exposure categories do not increase in a fixed order, unlike continuous risk exposure values)
+      normalize_to_tmrel: false
+  figure:
+    # show line connecting alternative and reference datapoints
+    show_ref: true
@@ -0,0 +1,81 @@
+seq,study_id,risk_type,ln_rr,ln_rr_se,ref_risk_cat,alt_risk_cat,cov_dummy
+0,93649,categorical,-0.357753711,0.088960128,unimproved,improved,0
+1,94512,categorical,-0.162518906,0.142761757,unimproved,solar,0
+2,111111,categorical,-1.56861592,0.667572011,piped,hq_piped,0
+3,116812,categorical,-0.287682072,0.034064764,unimproved,improved,0
+4,191572,categorical,-0.287682072,0.164257146,unimproved,improved,0
+5,191579,categorical,0.039220713,0.226110888,unimproved,piped,0
+6,191586,categorical,-0.248461398,0.148969044,unimproved,solar,0
+7,191801,categorical,-0.653926506,0.252201429,unimproved,filter,0
+9,191811,categorical,-0.46203546,0.614148812,improved,filter,0
+10,191811,categorical,-0.994252273,0.67865009,unimproved,filter,0
+11,193694,categorical,-0.036663995,0.054962366,unimproved,improved,0
+12,193701,categorical,-0.776528768,0.277094312,unimproved,solar,1
+13,193702,categorical,-0.616186102,0.163396749,unimproved,solar,0
+14,193703,categorical,-0.494296305,0.270635568,unimproved,improved,0
+15,193703,categorical,-0.494296305,0.270635568,unimproved,solar,0
+16,193728,categorical,-0.597836983,0.349056377,unimproved,filter,0
+17,193729,categorical,-0.755022584,0.165315975,improved,filter,0
+18,193730,categorical,-1.171182982,0.147471549,unimproved,solar,0
+19,193731,categorical,-0.293791819,0.067961316,unimproved,solar,0
+20,193732,categorical,-0.891598119,0.337997407,unimproved,solar,0
+21,193733,categorical,-0.030459177,0.112102077,unimproved,solar,0
+22,193734,categorical,-0.843970047,0.303932648,unimproved,piped,0
+23,193735,categorical,-0.994252273,0.128549606,unimproved,solar,0
+24,193736,categorical,-1.145703896,0.429935705,improved,hq_piped,0
+25,193737,categorical,-0.713349867,0.369118779,unimproved,filter,0
+26,193738,categorical,-0.597836983,0.189437785,unimproved,filter,0
+27,193739,categorical,-0.356674958,0.090123944,unimproved,solar,0
+28,193740,categorical,-0.494296305,0.029067948,unimproved,piped,0
+29,193741,categorical,0.122217633,0.067273797,unimproved,solar,0
+30,193742,categorical,-0.415515399,0.141299819,unimproved,solar,0
+31,193743,categorical,-0.415515399,0.141299819,unimproved,solar,0
+32,193744,categorical,-0.371063681,0.044478722,unimproved,solar,0
+33,193745,categorical,-0.446287118,0.148764549,unimproved,solar,0
+34,193746,categorical,-1.832581464,0.607038234,unimproved,solar,0
+35,193747,categorical,-0.094310647,0.180781736,unimproved,solar,0
+36,193748,categorical,-0.634878329,0.353653029,improved,piped,0
+37,193749,categorical,-0.457424867,0.17512395,unimproved,solar,0
+38,193750,categorical,-1.560647796,0.280263394,unimproved,filter,0
+39,193751,categorical,-0.056935116,0.125713839,unimproved,solar,0
+40,193751,categorical,-0.056935116,0.125713839,unimproved,improved,1
+41,193752,categorical,-0.105360538,0.290076399,unimproved,solar,0
+42,193753,categorical,-0.597836983,0.296212753,unimproved,solar,0
+43,193754,categorical,-0.287682072,0.145137252,improved,piped,0
+44,193755,categorical,-0.653926506,0.280263373,unimproved,solar,0
+45,193756,categorical,0.292669614,0.149948333,unimproved,piped,0
+46,193757,categorical,-0.248461398,0.032750886,unimproved,solar,0
+47,193758,categorical,-1.347073686,0.648666565,unimproved,filter,0
+48,193759,categorical,-0.941608565,0.340567751,unimproved,filter,0
+49,193760,categorical,-0.187211557,0.037168916,unimproved,piped,0
+50,193761,categorical,0.031193388,0.246379896,unimproved,solar,0
+51,193762,categorical,0.036405408,0.407646154,unimproved,improved,0
+52,193763,categorical,-0.010050326,0.080132943,unimproved,solar,0
+53,193764,categorical,-0.494296305,0.131421797,unimproved,filter,0
+54,193765,categorical,-0.527632793,0.2161514,improved,piped,0
+55,193766,categorical,-0.174353423,0.14096308,improved,piped,0
+56,193766,categorical,0.131028262,0.163571224,unimproved,piped,1
+57,193767,categorical,-0.494296305,0.156170756,unimproved,solar,0
+58,193768,categorical,-0.301105079,0.116089406,unimproved,piped,0
+59,193769,categorical,-1.660731207,0.578756432,unimproved,filter,0
+60,193769,categorical,-0.51082559,0.477509324,improved,filter,0
+61,193769,categorical,0.31481074,0.954946777,unimproved,filter,0
+62,193770,categorical,-1.272965676,0.264797633,unimproved,filter,0
+63,193771,categorical,-0.713349888,0.366605381,unimproved,filter,0
+64,193772,categorical,-0.235722308,0.08472957,unimproved,solar,0
+65,193773,categorical,-1.108662594,0.280263382,unimproved,solar,0
+66,193774,categorical,-0.061875404,0.029728563,unimproved,improved,0
+67,193775,categorical,-0.798507718,0.276971645,unimproved,filter,0
+68,222222,categorical,-2.772588722,0.597610582,piped,hq_piped,0
+69,268285,categorical,-0.223143539,0.232749551,unimproved,solar,0
+70,319502,categorical,-0.843970047,0.064856845,unimproved,solar,0
+71,333333,categorical,-0.376686124,0.947036574,piped,hq_piped,0
+72,334739,categorical,-1.560647796,0.083803584,unimproved,filter,0
+73,334743,categorical,-0.162518906,0.204375142,improved,filter,0
+74,334747,categorical,-0.162518906,0.176826501,unimproved,filter,0
+75,334751,categorical,-0.287682072,0.114530213,unimproved,filter,0
+76,334755,categorical,-1.560647796,0.16817797,unimproved,filter,0
+77,334755,categorical,-1.309333283,0.103436885,unimproved,filter,0
+78,354306,categorical,0.019802627,0.054697401,unimproved,solar,1
+79,357179,categorical,0.009950331,0.853402562,unimproved,solar,0
+80,357201,categorical,-0.248461398,0.032750886,unimproved,solar,0
@@ -4,15 +4,15 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "bopforge"
-version = "0.0.6"
+version = "0.1.0"
 description = "Pipelines for Burden of Proof (BoP) analyses"
 readme = "REDME.md"
 requires-python = ">=3.10"
 license = { file = "LICENSE" }
 authors = [
     { name = "IHME Math Sciences", email = "ihme.math.sciences@gmail.com" },
 ]
-dependencies = ["numpy", "scipy", "pandas", "matplotlib", "mrtool==0.1.4", "pplkit"]
+dependencies = ["numpy<2.0.0", "scipy>=1.14.1", "pandas", "matplotlib", "mrtool>=0.2.1", "pplkit"]
 
 [project.optional-dependencies]
 test = ["pytest"]
@@ -24,9 +24,10 @@ github = "https://github.com/ihmeuw-msca/bopforge"
 [project.scripts]
 continuous_pipeline = "bopforge.continuous_pipeline.__main__:main"
 dichotomous_pipeline = "bopforge.dichotomous_pipeline.__main__:main"
+categorical_pipeline = "bopforge.categorical_pipeline.__main__:main"
 
 [tool.sphinx]
-project = "modrover"
+project = "bopforge"
 author = "IHME Math Sciences"
 copyright = "2024, IHME Math Sciences"
-version = "0.0.6"
+version = "0.1.0"
@@ -0,0 +1,8 @@
+line-length = 80
+src = ["src"]
+
+[format]
+docstring-code-format = true
+
+[lint.pydocstyle]
+convention = "numpy"
@@ -0,0 +1,20 @@
+Categorical Pipeline
+====================
+
+Actions
+-------
+
+.. autofunction:: bopforge.categorical.__main__.pre_processing
+
+.. autofunction:: bopforge.categorical.__main__.fit_signal_model
+
+.. autofunction:: bopforge.categorical.__main__.select_bias_covs
+
+.. autofunction:: bopforge.categorical.__main__.fit_linear_model
+
+
+Functions
+---------
+
+.. automodule:: bopforge.categorical_pipeline.functions
+    :members: