|
| 1 | +"""The NGBoost Beta distribution and scores""" |
| 2 | +from scipy.stats import beta as dist |
| 3 | +from scipy.special import digamma, polygamma |
| 4 | +import numpy as np |
| 5 | + |
| 6 | +from ngboost.distns.distn import RegressionDistn |
| 7 | +from ngboost.scores import LogScore |
| 8 | + |
| 9 | +class BetaLogScore(LogScore): |
| 10 | + """Log score for the Beta distribution.""" |
| 11 | + def score(self, Y): |
| 12 | + """Calculate the log score for the Beta distribution.""" |
| 13 | + return -self.dist.logpdf(Y) |
| 14 | + |
| 15 | + def d_score(self, Y): |
| 16 | + """Calculate the derivative of the log score with respect to the parameters.""" |
| 17 | + D = np.zeros((len(Y), 2)) # first col is dS/d(log(a)), second col is dS/d(log(b)) |
| 18 | + D[:, 0] = - self.a * ( digamma(self.a + self.b) - digamma(self.a) + np.log(Y)) |
| 19 | + D[:, 1] = - self.b * ( digamma(self.a + self.b) - digamma(self.b) + np.log(1 - Y)) |
| 20 | + return D |
| 21 | + |
| 22 | + def metric(self): |
| 23 | + """Return the Fisher Information matrix for the Beta distribution.""" |
| 24 | + FI = np.zeros((self.a.shape[0], 2, 2)) |
| 25 | + trigamma_a_b = polygamma(1, self.a + self.b) |
| 26 | + FI[:, 0, 0] = self.a**2 * ( polygamma(1, self.a) - trigamma_a_b ) |
| 27 | + FI[:, 0, 1] = -self.a * self.b * trigamma_a_b |
| 28 | + FI[:, 1, 0] = -self.a * self.b * trigamma_a_b |
| 29 | + FI[:, 1, 1] = self.b**2 * ( polygamma(1, self.b) - trigamma_a_b ) |
| 30 | + return FI |
| 31 | + |
| 32 | +class Beta(RegressionDistn): |
| 33 | + """ |
| 34 | + Implements the Beta distribution for NGBoost. |
| 35 | +
|
| 36 | + The Beta distribution has two parameters, a and b. |
| 37 | + The scipy loc and scale parameters are held constant for this implementation. |
| 38 | + LogScore is supported for the Beta distribution. |
| 39 | + """ |
| 40 | + |
| 41 | + n_params = 2 |
| 42 | + scores = [BetaLogScore] # will implement this later |
| 43 | + |
| 44 | + def __init__(self, params): |
| 45 | + self._params = params |
| 46 | + |
| 47 | + # create other objects that will be useful later |
| 48 | + self.log_a = params[0] |
| 49 | + self.log_b = params[1] |
| 50 | + self.a = np.exp(params[0]) # since params[0] is log(a) |
| 51 | + self.b = np.exp(params[1]) # since params[1] is log(b) |
| 52 | + self.dist = dist(a=self.a, b=self.b) |
| 53 | + |
| 54 | + @staticmethod |
| 55 | + def fit(Y): |
| 56 | + """Fit the distribution to the data.""" |
| 57 | + # Use scipy's beta distribution to fit the parameters |
| 58 | + a, b, loc, scale = dist.fit(Y, floc=0, fscale=1) |
| 59 | + return np.array([np.log(a), np.log(b)]) |
| 60 | + |
| 61 | + def sample(self, m): |
| 62 | + """Sample from the distribution.""" |
| 63 | + return np.array([self.dist.rvs() for i in range(m)]) |
| 64 | + |
| 65 | + def __getattr__(self, name): # gives us access to Beta.mean() required for RegressionDist.predict() |
| 66 | + if name in dir(self.dist): |
| 67 | + return getattr(self.dist, name) |
| 68 | + return None |
| 69 | + |
| 70 | + @property |
| 71 | + def params(self): |
| 72 | + """Return the parameters of the Beta distribution.""" |
| 73 | + return {'a': self.a, 'b': self.b} |
0 commit comments