Skip to content

Add negative binomial distribution #356

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ngboost/distns/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from .laplace import Laplace
from .lognormal import LogNormal
from .multivariate_normal import MultivariateNormal
from .negative_binomial import NegativeBinomial
from .normal import Normal, NormalFixedMean, NormalFixedVar
from .poisson import Poisson
from .t import T, TFixedDf, TFixedDfFixedVar
Expand All @@ -23,6 +24,7 @@
"Laplace",
"LogNormal",
"MultivariateNormal",
"NegativeBinomial",
"Normal",
"NormalFixedMean",
"NormalFixedVar",
Expand Down
78 changes: 78 additions & 0 deletions ngboost/distns/negative_binomial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
"""The NGBoost NegativeBinomial distribution and scores"""
import numpy as np
from scipy.optimize import Bounds, minimize
from scipy.special import digamma
from scipy.stats import nbinom as dist

from ngboost.distns.distn import RegressionDistn
from ngboost.scores import LogScore


# helper function because scipy doesn't provide a fit function natively
def negative_log_likelihood(params, k):
return -dist.logpmf(k=k, n=params[0], p=params[1]).sum()


class NegativeBinomialLogScore(LogScore):
def score(self, Y):
return -self.dist.logpmf(Y)

def d_score(self, Y):
D = np.zeros((len(Y), 2))
D[:, 0] = -self.n * (digamma(Y + self.n) + np.log(self.p) - digamma(self.n))
D[:, 1] = (Y * np.exp(self.z) - self.n) / (np.exp(self.z) + 1)
return D

def metric(self):
FI = np.zeros((self.n.shape[0], 2, 2))
FI[:, 0, 0] = (self.n * self.p) / (self.p + 1)
FI[:, 1, 0] = (self.p - 1) * self.n
FI[:, 0, 1] = (self.p - 1) * self.n
FI[:, 1, 1] = self.n * (1 - self.p)
return FI


class NegativeBinomial(RegressionDistn):

n_params = 2
scores = [NegativeBinomialLogScore]

# pylint: disable=super-init-not-called
def __init__(self, params):
# save the parameters
self._params = params

self.logn = params[0]
self.n = np.exp(self.logn)
# z = log(p/(1-p)) => p = 1/(1 + e^(-z))
self.z = params[1]
self.p = 1 / (1 + np.exp(-self.z))
self.dist = dist(n=self.n, p=self.p)

def fit(Y):
assert np.equal(
np.mod(Y, 1), 0
).all(), "All Negative Binomial target data must be discrete integers"
assert np.all([y >= 0 for y in Y]), "Count data must be >= 0"

m = minimize(
negative_log_likelihood,
x0=np.array([np.max(Y), 0.5]), # initialized value
args=(Y,),
bounds=Bounds((1e-8, 1e-8), (np.inf, 1 - 1e-8)),
)
return np.array([np.log(m.x[0]), np.log(m.x[1] / (1 - m.x[1]))])

def sample(self, m):
return np.array([self.dist.rvs() for i in range(m)])

def __getattr__(
self, name
): # gives us access to NegativeBinomial.mean() required for RegressionDist.predict()
if name in dir(self.dist):
return getattr(self.dist, name)
return None

@property
def params(self):
return {"n": self.n, "p": self.p}
2 changes: 2 additions & 0 deletions tests/test_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
Gamma,
Laplace,
MultivariateNormal,
NegativeBinomial,
Normal,
Poisson,
T,
Expand Down Expand Up @@ -100,6 +101,7 @@ def idfn(dist_score: DistScore):
(Laplace, LogScore),
(Poisson, LogScore),
(Gamma, LogScore),
(NegativeBinomial, LogScore),
] + [(MultivariateNormal(i), LogScore) for i in range(2, 5)]
# Fill in the dist, score pair to test the gradient
# Tests all in TEST_METRIC by default
Expand Down
Loading