From a38f00bafffec8303b6fcc9cafabd753fe1acfed Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Tue, 3 Sep 2024 14:48:29 -0500 Subject: [PATCH 1/4] initialize equity metric --- pyincore/analyses/equitymetric/__init__.py | 8 + .../analyses/equitymetric/equitymetric.py | 137 ++++++++++++++++++ .../analyses/equitymetric/equitymetricutil.py | 57 ++++++++ 3 files changed, 202 insertions(+) create mode 100644 pyincore/analyses/equitymetric/__init__.py create mode 100644 pyincore/analyses/equitymetric/equitymetric.py create mode 100644 pyincore/analyses/equitymetric/equitymetricutil.py diff --git a/pyincore/analyses/equitymetric/__init__.py b/pyincore/analyses/equitymetric/__init__.py new file mode 100644 index 00000000..f4051d97 --- /dev/null +++ b/pyincore/analyses/equitymetric/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) 2024 University of Illinois and others. All rights reserved. +# +# This program and the accompanying materials are made available under the +# terms of the Mozilla Public License v2.0 which accompanies this distribution, +# and is available at https://www.mozilla.org/en-US/MPL/2.0/ + +from pyincore.analyses.equitymetric.equitymetric import EquityMetric +from pyincore.analyses.equitymetric.equitymetricutil import EquityMetricUtil diff --git a/pyincore/analyses/equitymetric/equitymetric.py b/pyincore/analyses/equitymetric/equitymetric.py new file mode 100644 index 00000000..51af5a76 --- /dev/null +++ b/pyincore/analyses/equitymetric/equitymetric.py @@ -0,0 +1,137 @@ +# Copyright (c) 2024 University of Illinois and others. All rights reserved. +# +# This program and the accompanying materials are made available under the +# terms of the Mozilla Public License v2.0 which accompanies this distribution, +# and is available at https://www.mozilla.org/en-US/MPL/2.0/ + +import numpy as np +from pyincore import BaseAnalysis + + +class EquityMetric(BaseAnalysis): + """Computes electric power infrastructure functionality. + Args: + incore_client: Service client with authentication info + """ + + def __init__(self, incore_client): + super(EquityMetric, self).__init__(incore_client) + + def run(self): + """Execute equity metric analysis""" + + division_decision_column = self.get_parameter("division_decision_column") + scarce_resource_df = self.get_input_dataset( + "scarce_resource" + ).get_dataframe_from_csv() + hua_df = self.get_input_dataset( + "housing_unit_allocation" + ).get_dataframe_from_csv() + merged_df = hua_df.merge( + scarce_resource_df, how="inner", left_on="guid", right_on="guid" + ) + + equity_metric = self.equity_metric(merged_df, division_decision_column) + + self.set_result_csv_data( + "equity_metric", + equity_metric, + name=self.get_parameter("result_name") + "_equity_metric", + source="dataframe", + ) + + return True + + def equity_metric(self, merged_gdf, division_decision_column): + """ + Compute equity metric + Args: + merged_gdf: Merging housing unit allocation and scarce resource to create geopands dataframes + division_decision_column: column name of the division decision variable e.g. SVI + + Returns: + equity_metric: equity metric values that consist of Theil’s T Value, Between Zone Inequality, Within Zone Inequality + + """ + # Calculation of households in each group + total_1 = merged_gdf[merged_gdf[division_decision_column] > 0].shape[ + 0 + ] # socially vulnerable populations + total_2 = merged_gdf[merged_gdf[division_decision_column] < 1].shape[ + 0 + ] # non socially vulnerable populations + total_households = ( + total_1 + total_2 + ) # for non-vacant households (i.e., non-vacant are not included) + + # Metric Computation + scarce_resource = merged_gdf["scarce_resource"] + yi = scarce_resource / np.sum(scarce_resource) + Yg_1 = np.sum(yi[merged_gdf[division_decision_column] > 0]) + Yg_2 = np.sum(yi[merged_gdf[division_decision_column] < 1]) + TheilT = np.sum(yi * np.log(yi * total_households)) + bzi = np.sum(yi[merged_gdf[division_decision_column] > 0]) * np.log( + np.average(yi[merged_gdf[division_decision_column] > 0]) / np.average(yi) + ) + np.sum(yi[merged_gdf[division_decision_column] < 1]) * np.log( + np.average(yi[merged_gdf[division_decision_column] < 1]) / np.average(yi) + ) + wzi = Yg_1 * np.sum( + yi[merged_gdf[division_decision_column] > 0] + / Yg_1 + * np.log((yi[merged_gdf[division_decision_column] > 0] / Yg_1 * total_1)) + ) + Yg_2 * np.sum( + yi[merged_gdf[division_decision_column] < 1] + / Yg_2 + * np.log((yi[merged_gdf[division_decision_column] < 1] / Yg_2 * total_2)) + ) + + return [{"Theils T": TheilT, "BZI": bzi, "WZI": wzi}] + + def get_spec(self): + """Get specifications of the Equity Metric analysis. + Returns: + obj: A JSON object of specifications of the Equity Metric analysis. + """ + return { + "name": "equity-metric", + "description": "Equity metric analysis", + "input_parameters": [ + { + "id": "result_name", + "required": True, + "description": "result dataset name", + "type": str, + }, + { + "id": "division_decision_column", + "required": True, + "description": "Division decision. " + "Binary variable associated with each household used to group it into two groups " + "(e.g. low income vs non low income, minority vs non-minority, " + "social vulnerability)", + "type": str, + }, + ], + "input_datasets": [ + { + "id": "housing_unit_allocation", + "required": True, + "description": "A csv file with the merged dataset of the inputs, aka Probabilistic" + "House Unit Allocation", + "type": ["incore:housingUnitAllocation"], + }, + { + "id": "scarce_resource", + "required": True, + "description": "Scarce resource dataset e.g. probability of service, return time, etc", + "type": ["incore:housingRecoveryHistory"], + }, + ], + "output_datasets": [ + { + "id": "equity_metric", + "description": "CSV file of equity metric, including Theil’s T Value, Between Zone Inequality, Within Zone Inequality", + "type": "incore:equityMetric", + } + ], + } diff --git a/pyincore/analyses/equitymetric/equitymetricutil.py b/pyincore/analyses/equitymetric/equitymetricutil.py new file mode 100644 index 00000000..a69db320 --- /dev/null +++ b/pyincore/analyses/equitymetric/equitymetricutil.py @@ -0,0 +1,57 @@ +# Copyright (c) 2024 University of Illinois and others. All rights reserved. +# +# This program and the accompanying materials are made available under the +# terms of the Mozilla Public License v2.0 which accompanies this distribution, +# and is available at https://www.mozilla.org/en-US/MPL/2.0/ + +import pandas as pd + + +class EquityMetricUtil: + @staticmethod + def prepare_svi_as_division_decision(merged_gdf): + """ + socially vulnerability as division decision variable which is a binary variable associated with each household + used to group it into two groups + Args: + merged_gdf: + + Returns: + + """ + # Add variable to indicate if high socially vulnerability for metric's computation + median_income = merged_gdf["randincome"].median() + + condition1 = merged_gdf["randincome"] <= median_income + condition2 = merged_gdf["ownershp"] == 2 + condition3 = merged_gdf["race"] != 1 + condition4 = merged_gdf["hispan"] != 0 + + merged_gdf["SVI"] = condition1 & condition2 & condition3 & condition4 + merged_gdf["SVI"] = (merged_gdf["SVI"]).astype(int) + + return merged_gdf + + @staticmethod + def prepare_return_time_as_scarce_resrouce(return_df): + return_sequence = return_df.iloc[:, 4:94] + # add return time to the scarce resource dataset + time_to_return = EquityMetricUtil.time_to_return(return_sequence) + return_df["Return Time"] = pd.to_numeric(time_to_return) + return_df["scarce_resource"] = 91 - return_df["Return Time"] + + return return_df + + @staticmethod + def time_to_return(return_sequence): + # now create a for loop to determine the time for each row + time_to_return = [] + for i in range(0, return_sequence.shape[0]): + if max(return_sequence.iloc[i]) == 4: + column_index = (return_sequence == 4).idxmax(axis=1)[i] + else: + # assuming for 5 that it is never recovered, so we set it to max time interval of 90 + column_index = 90 + time_to_return.append(column_index) + + return time_to_return From 1aee25b0b9eb64c4f7e42598363a89adfc5f55d5 Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Tue, 3 Sep 2024 15:57:24 -0500 Subject: [PATCH 2/4] update test --- CHANGELOG.md | 1 + .../analyses/equitymetric/equitymetric.py | 36 ++++++++++--------- .../analyses/equitymetric/equitymetricutil.py | 26 +++++++------- .../equitymetric/test_equitymetric.py | 33 +++++++++++++++++ 4 files changed, 67 insertions(+), 29 deletions(-) create mode 100644 tests/pyincore/analyses/equitymetric/test_equitymetric.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 22bce1e5..1892f547 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ### Added - Apply Black formatter [#589](https://github.com/IN-CORE/pyincore/issues/589) +- Equity Metric Analysis [#608](https://github.com/IN-CORE/pyincore/issues/608) ## [1.19.0] - 2024-06-12 diff --git a/pyincore/analyses/equitymetric/equitymetric.py b/pyincore/analyses/equitymetric/equitymetric.py index 51af5a76..bb2b8b38 100644 --- a/pyincore/analyses/equitymetric/equitymetric.py +++ b/pyincore/analyses/equitymetric/equitymetric.py @@ -6,6 +6,7 @@ import numpy as np from pyincore import BaseAnalysis +from pyincore.analyses.equitymetric.equitymetricutil import EquityMetricUtil class EquityMetric(BaseAnalysis): @@ -27,6 +28,9 @@ def run(self): hua_df = self.get_input_dataset( "housing_unit_allocation" ).get_dataframe_from_csv() + if division_decision_column == "SVI" and "SVI" not in hua_df.columns: + hua_df = EquityMetricUtil.prepare_svi_as_division_decision(hua_df) + merged_df = hua_df.merge( scarce_resource_df, how="inner", left_on="guid", right_on="guid" ) @@ -42,11 +46,11 @@ def run(self): return True - def equity_metric(self, merged_gdf, division_decision_column): + def equity_metric(self, merged_df, division_decision_column): """ Compute equity metric Args: - merged_gdf: Merging housing unit allocation and scarce resource to create geopands dataframes + merged_df: Merging housing unit allocation and scarce resource to create dataframes division_decision_column: column name of the division decision variable e.g. SVI Returns: @@ -54,10 +58,10 @@ def equity_metric(self, merged_gdf, division_decision_column): """ # Calculation of households in each group - total_1 = merged_gdf[merged_gdf[division_decision_column] > 0].shape[ + total_1 = merged_df[merged_df[division_decision_column] > 0].shape[ 0 ] # socially vulnerable populations - total_2 = merged_gdf[merged_gdf[division_decision_column] < 1].shape[ + total_2 = merged_df[merged_df[division_decision_column] < 1].shape[ 0 ] # non socially vulnerable populations total_households = ( @@ -65,24 +69,24 @@ def equity_metric(self, merged_gdf, division_decision_column): ) # for non-vacant households (i.e., non-vacant are not included) # Metric Computation - scarce_resource = merged_gdf["scarce_resource"] + scarce_resource = merged_df["scarce_resource"] yi = scarce_resource / np.sum(scarce_resource) - Yg_1 = np.sum(yi[merged_gdf[division_decision_column] > 0]) - Yg_2 = np.sum(yi[merged_gdf[division_decision_column] < 1]) + Yg_1 = np.sum(yi[merged_df[division_decision_column] > 0]) + Yg_2 = np.sum(yi[merged_df[division_decision_column] < 1]) TheilT = np.sum(yi * np.log(yi * total_households)) - bzi = np.sum(yi[merged_gdf[division_decision_column] > 0]) * np.log( - np.average(yi[merged_gdf[division_decision_column] > 0]) / np.average(yi) - ) + np.sum(yi[merged_gdf[division_decision_column] < 1]) * np.log( - np.average(yi[merged_gdf[division_decision_column] < 1]) / np.average(yi) + bzi = np.sum(yi[merged_df[division_decision_column] > 0]) * np.log( + np.average(yi[merged_df[division_decision_column] > 0]) / np.average(yi) + ) + np.sum(yi[merged_df[division_decision_column] < 1]) * np.log( + np.average(yi[merged_df[division_decision_column] < 1]) / np.average(yi) ) wzi = Yg_1 * np.sum( - yi[merged_gdf[division_decision_column] > 0] + yi[merged_df[division_decision_column] > 0] / Yg_1 - * np.log((yi[merged_gdf[division_decision_column] > 0] / Yg_1 * total_1)) + * np.log((yi[merged_df[division_decision_column] > 0] / Yg_1 * total_1)) ) + Yg_2 * np.sum( - yi[merged_gdf[division_decision_column] < 1] + yi[merged_df[division_decision_column] < 1] / Yg_2 - * np.log((yi[merged_gdf[division_decision_column] < 1] / Yg_2 * total_2)) + * np.log((yi[merged_df[division_decision_column] < 1] / Yg_2 * total_2)) ) return [{"Theils T": TheilT, "BZI": bzi, "WZI": wzi}] @@ -124,7 +128,7 @@ def get_spec(self): "id": "scarce_resource", "required": True, "description": "Scarce resource dataset e.g. probability of service, return time, etc", - "type": ["incore:housingRecoveryHistory"], + "type": ["incore:scarceResource"], }, ], "output_datasets": [ diff --git a/pyincore/analyses/equitymetric/equitymetricutil.py b/pyincore/analyses/equitymetric/equitymetricutil.py index a69db320..62381537 100644 --- a/pyincore/analyses/equitymetric/equitymetricutil.py +++ b/pyincore/analyses/equitymetric/equitymetricutil.py @@ -9,41 +9,41 @@ class EquityMetricUtil: @staticmethod - def prepare_svi_as_division_decision(merged_gdf): + def prepare_svi_as_division_decision(hua_df): """ socially vulnerability as division decision variable which is a binary variable associated with each household used to group it into two groups Args: - merged_gdf: + hua_df: Returns: """ # Add variable to indicate if high socially vulnerability for metric's computation - median_income = merged_gdf["randincome"].median() + median_income = hua_df["randincome"].median() - condition1 = merged_gdf["randincome"] <= median_income - condition2 = merged_gdf["ownershp"] == 2 - condition3 = merged_gdf["race"] != 1 - condition4 = merged_gdf["hispan"] != 0 + condition1 = hua_df["randincome"] <= median_income + condition2 = hua_df["ownershp"] == 2 + condition3 = hua_df["race"] != 1 + condition4 = hua_df["hispan"] != 0 - merged_gdf["SVI"] = condition1 & condition2 & condition3 & condition4 - merged_gdf["SVI"] = (merged_gdf["SVI"]).astype(int) + hua_df["SVI"] = condition1 & condition2 & condition3 & condition4 + hua_df["SVI"] = (hua_df["SVI"]).astype(int) - return merged_gdf + return hua_df @staticmethod - def prepare_return_time_as_scarce_resrouce(return_df): + def prepare_return_time_as_scarce_resource(return_df): return_sequence = return_df.iloc[:, 4:94] # add return time to the scarce resource dataset - time_to_return = EquityMetricUtil.time_to_return(return_sequence) + time_to_return = EquityMetricUtil._time_to_return(return_sequence) return_df["Return Time"] = pd.to_numeric(time_to_return) return_df["scarce_resource"] = 91 - return_df["Return Time"] return return_df @staticmethod - def time_to_return(return_sequence): + def _time_to_return(return_sequence): # now create a for loop to determine the time for each row time_to_return = [] for i in range(0, return_sequence.shape[0]): diff --git a/tests/pyincore/analyses/equitymetric/test_equitymetric.py b/tests/pyincore/analyses/equitymetric/test_equitymetric.py new file mode 100644 index 00000000..5d1694c6 --- /dev/null +++ b/tests/pyincore/analyses/equitymetric/test_equitymetric.py @@ -0,0 +1,33 @@ +from pyincore import IncoreClient, Dataset, DataService +from pyincore.analyses.equitymetric import EquityMetric +from pyincore.analyses.equitymetric import EquityMetricUtil +import pyincore.globals as pyglobals + + +def run_with_base_class(): + client = IncoreClient(pyglobals.INCORE_API_DEV_URL) + datasvc = DataService(client) + + # prepare input dataset + return_df = Dataset.from_data_service( + "66d7763b43810e1298b0e8b1", datasvc + ).get_dataframe_from_csv() + scarce_resource_df = EquityMetricUtil.prepare_return_time_as_scarce_resource( + return_df + ) + scarce_resource = Dataset.from_dataframe( + scarce_resource_df, "scarce_resource", data_type="incore:scarceResource" + ) + + equity_metric = EquityMetric(client) + equity_metric.set_parameter("result_name", "Galveston_recovery_time") + equity_metric.set_parameter("division_decision_column", "SVI") + equity_metric.load_remote_input_dataset( + "housing_unit_allocation", "66d7770543810e1298b0e8b6" + ) + equity_metric.set_input_dataset("scarce_resource", scarce_resource) + equity_metric.run_analysis() + + +if __name__ == "__main__": + run_with_base_class() From 6d6df7ecdd68dc291addc89a2760c019c9ce551c Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Tue, 3 Sep 2024 16:15:21 -0500 Subject: [PATCH 3/4] remove dataframe flag --- pyincore/analyses/equitymetric/equitymetric.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pyincore/analyses/equitymetric/equitymetric.py b/pyincore/analyses/equitymetric/equitymetric.py index bb2b8b38..f31485b9 100644 --- a/pyincore/analyses/equitymetric/equitymetric.py +++ b/pyincore/analyses/equitymetric/equitymetric.py @@ -41,7 +41,6 @@ def run(self): "equity_metric", equity_metric, name=self.get_parameter("result_name") + "_equity_metric", - source="dataframe", ) return True From e6fad355916349f13c05baa56ee0f7e96fedcadc Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Mon, 7 Oct 2024 21:40:37 -0500 Subject: [PATCH 4/4] fix docstring; add to doc --- docs/source/modules.rst | 7 +++++++ pyincore/analyses/equitymetric/equitymetric.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/source/modules.rst b/docs/source/modules.rst index 3e9d357e..b053606a 100644 --- a/docs/source/modules.rst +++ b/docs/source/modules.rst @@ -111,6 +111,13 @@ analyses/epnfunctionality .. autoclass:: epnfunctionality.epnfunctionality.EpnFunctionalityUtil :members: +analyses/equitymetric +========================= +.. autoclass:: equitymetric.equitymetric.EquityMetric + :members: +.. autoclass:: equitymetric.equitymetric.EquityMetricUtil + :members: + analyses/example ================ .. autoclass:: example.exampleanalysis.ExampleAnalysis diff --git a/pyincore/analyses/equitymetric/equitymetric.py b/pyincore/analyses/equitymetric/equitymetric.py index f31485b9..8e3e8d62 100644 --- a/pyincore/analyses/equitymetric/equitymetric.py +++ b/pyincore/analyses/equitymetric/equitymetric.py @@ -10,7 +10,7 @@ class EquityMetric(BaseAnalysis): - """Computes electric power infrastructure functionality. + """Computes equity metric. Args: incore_client: Service client with authentication info """