From 12f26c9e42e1deb069b5741e48b55d8c5872af37 Mon Sep 17 00:00:00 2001 From: Alex Dundore Date: Mon, 7 Apr 2025 19:39:17 -0500 Subject: [PATCH 1/2] feat: added Weibull distribution --- ngboost/distns/__init__.py | 2 ++ ngboost/distns/weibull.py | 65 ++++++++++++++++++++++++++++++++++++++ tests/test_distns.py | 2 ++ tests/test_score.py | 2 ++ 4 files changed, 71 insertions(+) create mode 100644 ngboost/distns/weibull.py diff --git a/ngboost/distns/__init__.py b/ngboost/distns/__init__.py index 05e94128..8642410e 100644 --- a/ngboost/distns/__init__.py +++ b/ngboost/distns/__init__.py @@ -10,6 +10,7 @@ from .normal import Normal, NormalFixedMean, NormalFixedVar from .poisson import Poisson from .t import T, TFixedDf, TFixedDfFixedVar +from .weibull import Weibull __all__ = [ "Bernoulli", @@ -30,4 +31,5 @@ "T", "TFixedDf", "TFixedDfFixedVar", + "Weibull" ] diff --git a/ngboost/distns/weibull.py b/ngboost/distns/weibull.py new file mode 100644 index 00000000..457d104d --- /dev/null +++ b/ngboost/distns/weibull.py @@ -0,0 +1,65 @@ +"""The NGBoost Weibull distribution and scores""" +import numpy as np +from scipy.stats import weibull_min as dist + +from ngboost.distns.distn import RegressionDistn +from ngboost.scores import LogScore + + +class WeibullLogScore(LogScore): + def score(self, Y): + return -self.dist.logpdf(Y) + + def d_score(self, Y): + D = np.zeros((len(Y), 2)) + shared_term = self.shape * ((Y / self.scale)**self.shape - 1) + D[:, 0] = shared_term * np.log(Y / self.scale) - 1 + D[:, 1] = -shared_term + + return D + + def metric(self): + gamma = 0.5772156649 # Euler's constant + FI = np.zeros((self.scale.shape[0], 2, 2)) + FI[:, 0, 0] = (np.pi**2 / 6) + (1 - gamma)**2 + FI[:, 1, 0] = -self.shape * (1 - gamma) + FI[:, 0, 1] = FI[:, 1, 0] + FI[:, 1, 1] = self.shape**2 + + return FI + + +class Weibull(RegressionDistn): + """ + Implements the Weibull distribution for NGBoost. + + The Weibull distribution has two parameters, shape and scale. + The scipy loc parameter is held constant for this implementation. + LogScore is supported for the Weibull distribution. + """ + + n_params = 2 + scores = [WeibullLogScore] + + # pylint: disable=super-init-not-called + def __init__(self, params): + self._params = params + self.shape = np.exp(params[0]) # shape (c) + self.scale = np.exp(params[1]) # scale (labmda) + self.dist = dist(c=self.shape, loc=0, scale=self.scale) + + def fit(Y): + shape, _loc, scale = dist.fit(Y, floc=0) # hold loc constant + return np.array([np.log(shape), np.log(scale)]) + + def sample(self, m): + return np.array([self.dist.rvs() for i in range(m)]) + + def __getattr__(self, name): + if name in dir(self.dist): + return getattr(self.dist, name) + return None + + @property + def params(self): + return {'shape': self.shape, 'scale': self.scale} diff --git a/tests/test_distns.py b/tests/test_distns.py index 3aff1b9e..258040be 100644 --- a/tests/test_distns.py +++ b/tests/test_distns.py @@ -22,6 +22,7 @@ TFixedDf, TFixedDfFixedVar, k_categorical, + Weibull ) from ngboost.scores import CRPScore, LogScore, Score @@ -74,6 +75,7 @@ def is_t_distribution( TFixedDf, TFixedDfFixedVar, Cauchy, + Weibull ], ) @pytest.mark.parametrize( diff --git a/tests/test_score.py b/tests/test_score.py index 3d291a5a..1969f74a 100644 --- a/tests/test_score.py +++ b/tests/test_score.py @@ -16,6 +16,7 @@ T, TFixedDf, TFixedDfFixedVar, + Weibull ) from ngboost.manifold import manifold from ngboost.scores import CRPScore, LogScore, Score @@ -100,6 +101,7 @@ def idfn(dist_score: DistScore): (Laplace, LogScore), (Poisson, LogScore), (Gamma, LogScore), + (Weibull, LogScore) ] + [(MultivariateNormal(i), LogScore) for i in range(2, 5)] # Fill in the dist, score pair to test the gradient # Tests all in TEST_METRIC by default From 267865225321eb2bc28adaac2b4ff51ead929163 Mon Sep 17 00:00:00 2001 From: Alex Dundore Date: Mon, 7 Apr 2025 19:58:42 -0500 Subject: [PATCH 2/2] fix: black and isort formatters --- ngboost/distns/__init__.py | 2 +- ngboost/distns/weibull.py | 6 +++--- tests/test_distns.py | 4 ++-- tests/test_score.py | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ngboost/distns/__init__.py b/ngboost/distns/__init__.py index 8642410e..6b645707 100644 --- a/ngboost/distns/__init__.py +++ b/ngboost/distns/__init__.py @@ -31,5 +31,5 @@ "T", "TFixedDf", "TFixedDfFixedVar", - "Weibull" + "Weibull", ] diff --git a/ngboost/distns/weibull.py b/ngboost/distns/weibull.py index 457d104d..582811a9 100644 --- a/ngboost/distns/weibull.py +++ b/ngboost/distns/weibull.py @@ -12,7 +12,7 @@ def score(self, Y): def d_score(self, Y): D = np.zeros((len(Y), 2)) - shared_term = self.shape * ((Y / self.scale)**self.shape - 1) + shared_term = self.shape * ((Y / self.scale) ** self.shape - 1) D[:, 0] = shared_term * np.log(Y / self.scale) - 1 D[:, 1] = -shared_term @@ -21,7 +21,7 @@ def d_score(self, Y): def metric(self): gamma = 0.5772156649 # Euler's constant FI = np.zeros((self.scale.shape[0], 2, 2)) - FI[:, 0, 0] = (np.pi**2 / 6) + (1 - gamma)**2 + FI[:, 0, 0] = (np.pi**2 / 6) + (1 - gamma) ** 2 FI[:, 1, 0] = -self.shape * (1 - gamma) FI[:, 0, 1] = FI[:, 1, 0] FI[:, 1, 1] = self.shape**2 @@ -62,4 +62,4 @@ def __getattr__(self, name): @property def params(self): - return {'shape': self.shape, 'scale': self.scale} + return {"shape": self.shape, "scale": self.scale} diff --git a/tests/test_distns.py b/tests/test_distns.py index 258040be..c697fbf9 100644 --- a/tests/test_distns.py +++ b/tests/test_distns.py @@ -21,8 +21,8 @@ T, TFixedDf, TFixedDfFixedVar, + Weibull, k_categorical, - Weibull ) from ngboost.scores import CRPScore, LogScore, Score @@ -75,7 +75,7 @@ def is_t_distribution( TFixedDf, TFixedDfFixedVar, Cauchy, - Weibull + Weibull, ], ) @pytest.mark.parametrize( diff --git a/tests/test_score.py b/tests/test_score.py index 1969f74a..c50d7b60 100644 --- a/tests/test_score.py +++ b/tests/test_score.py @@ -16,7 +16,7 @@ T, TFixedDf, TFixedDfFixedVar, - Weibull + Weibull, ) from ngboost.manifold import manifold from ngboost.scores import CRPScore, LogScore, Score @@ -101,7 +101,7 @@ def idfn(dist_score: DistScore): (Laplace, LogScore), (Poisson, LogScore), (Gamma, LogScore), - (Weibull, LogScore) + (Weibull, LogScore), ] + [(MultivariateNormal(i), LogScore) for i in range(2, 5)] # Fill in the dist, score pair to test the gradient # Tests all in TEST_METRIC by default