|
| 1 | +from tqdm import tqdm |
| 2 | +import matplotlib.pyplot as plt |
| 3 | +import numpy as np |
| 4 | + |
| 5 | +try: |
| 6 | + import cupy as cp |
| 7 | +except Exception: |
| 8 | + cp = None |
| 9 | + |
| 10 | +class LogisticMatrixFactorization: |
| 11 | + def __init__(self, k=30, l2_p=1e-6, epochs=1000, learning_rate=0.001, tolerance=1e-4, device="cpu", random_state=None): |
| 12 | + """ |
| 13 | + Logistic Matrix Factorization with a mask. |
| 14 | +
|
| 15 | + Parameters: |
| 16 | + - k: Number of latent factors. |
| 17 | + - l2_p: Regularization parameter (L2 penalty). |
| 18 | + - epochs: Number of training epochs. |
| 19 | + - learning_rate: Learning rate for gradient descent. |
| 20 | + - tolerance: Early stopping criterion based on loss change. |
| 21 | + """ |
| 22 | + self.k = k |
| 23 | + self.l2_p = l2_p |
| 24 | + self.epochs = epochs |
| 25 | + self.learning_rate = learning_rate |
| 26 | + self.tolerance = tolerance |
| 27 | + self.np = np |
| 28 | + self.random_state = random_state |
| 29 | + |
| 30 | + if device == "cpu": |
| 31 | + self.device = device |
| 32 | + elif device == "gpu": |
| 33 | + self.device = 0 |
| 34 | + elif isinstance(device, int) and device >= 0: |
| 35 | + self.device = device |
| 36 | + else: |
| 37 | + raise Exception("Device should be 'cpu', 'gpu' (CUDA:0), or a GPU number between 0 and N-1 where N is the number of GPUs.") |
| 38 | + |
| 39 | + if self.device != "cpu" and cp is None: |
| 40 | + print("No CUDA found! Using CPU!") |
| 41 | + self.device = "cpu" |
| 42 | + |
| 43 | + def fit(self, Xtrain, MASK, plot_loss=True): |
| 44 | + """ |
| 45 | + Train the logistic matrix factorization model. |
| 46 | +
|
| 47 | + Parameters: |
| 48 | + - Xtrain: Training interaction matrix (m x n). |
| 49 | + - MASK: Binary mask matrix with 1s for observed entries in Xtrain. |
| 50 | +
|
| 51 | + Returns: |
| 52 | + - W: Learned row (user) latent feature matrix (m x k). |
| 53 | + - H: Learned column (item) latent feature matrix (k x n). |
| 54 | + - row_bias: Learned row bias vector (m x 1). |
| 55 | + - col_bias: Learned column bias vector (1 x n). |
| 56 | + """ |
| 57 | + if self.device != "cpu": |
| 58 | + self.np = cp |
| 59 | + |
| 60 | + m, n = Xtrain.shape |
| 61 | + W, H, row_bias, col_bias = self._initialize_embeddings(m, n) |
| 62 | + |
| 63 | + if self.device != "cpu": |
| 64 | + with cp.cuda.Device(self.device): |
| 65 | + losses = cp.zeros(self.epochs) |
| 66 | + MASK = cp.array(MASK) |
| 67 | + Xtrain = cp.array(Xtrain) |
| 68 | + W, H, row_bias, col_bias, losses = self._factorization_routine(W, H, row_bias, col_bias, MASK, Xtrain, losses) |
| 69 | + |
| 70 | + # to CPU |
| 71 | + W = cp.asnumpy(W) |
| 72 | + H = cp.asnumpy(H) |
| 73 | + row_bias = cp.asnumpy(row_bias) |
| 74 | + col_bias = cp.asnumpy(col_bias) |
| 75 | + MASK = cp.asnumpy(MASK) |
| 76 | + Xtrain = cp.asnumpy(Xtrain) |
| 77 | + losses = cp.asnumpy(losses) |
| 78 | + self.np = np |
| 79 | + else: |
| 80 | + losses = np.zeros(self.epochs) |
| 81 | + W, H, row_bias, col_bias, losses = self._factorization_routine(W, H, row_bias, col_bias, MASK, Xtrain, losses) |
| 82 | + |
| 83 | + # Plot loss |
| 84 | + if plot_loss: |
| 85 | + plt.plot(losses) |
| 86 | + plt.xlabel('Epoch') |
| 87 | + plt.ylabel('Loss') |
| 88 | + plt.title('Training Loss') |
| 89 | + plt.show() |
| 90 | + |
| 91 | + return W, H, row_bias, col_bias, losses |
| 92 | + |
| 93 | + def predict(self, W, H, row_bias, col_bias): |
| 94 | + """ |
| 95 | + Predict all entries in the matrix. |
| 96 | +
|
| 97 | + Parameters: |
| 98 | + - W: Learned row latent feature matrix (m x k). |
| 99 | + - H: Learned column latent feature matrix (k x n). |
| 100 | + - row_bias: Learned row bias vector (m x 1). |
| 101 | + - col_bias: Learned column bias vector (1 x n). |
| 102 | +
|
| 103 | + Returns: |
| 104 | + - Xtilda: Predicted matrix of interaction probabilities. |
| 105 | + """ |
| 106 | + return self._sigmoid(self.np.dot(W, H) + row_bias + col_bias) |
| 107 | + |
| 108 | + def map_probabilities_to_binary(self, Xtilda, threshold=0.5): |
| 109 | + """ |
| 110 | + Map probabilities to binary values (0 or 1) using a threshold. |
| 111 | +
|
| 112 | + Parameters: |
| 113 | + - Xtilda: numpy array, predicted probabilities (values in [0, 1]). |
| 114 | + - threshold: float, the cutoff for mapping probabilities to 0 or 1. |
| 115 | +
|
| 116 | + Returns: |
| 117 | + - Xtilda_binary: numpy array, binary Xtilda (0s and 1s). |
| 118 | + """ |
| 119 | + return (Xtilda >= threshold).astype(int) |
| 120 | + |
| 121 | + def _initialize_embeddings(self, m, n): |
| 122 | + """ |
| 123 | + Initialize embeddings (W and H) and biases for rows (users) and columns (items). |
| 124 | + """ |
| 125 | + np.random.seed(self.random_state) |
| 126 | + |
| 127 | + W = np.random.normal(scale=0.1, size=(m, self.k)) |
| 128 | + H = np.random.normal(scale=0.1, size=(self.k, n)) |
| 129 | + row_bias = np.random.normal(scale=0.1, size=(m, 1)) |
| 130 | + col_bias = np.random.normal(scale=0.1, size=(1, n)) |
| 131 | + |
| 132 | + if self.device != "cpu": |
| 133 | + with cp.cuda.Device(self.device): |
| 134 | + W, H, row_bias, col_bias = cp.array(W), cp.array(H), cp.array(row_bias), cp.array(col_bias) |
| 135 | + |
| 136 | + return W, H, row_bias, col_bias |
| 137 | + |
| 138 | + def _sigmoid(self, x): |
| 139 | + return 1 / (1 + self.np.exp(-x)) |
| 140 | + |
| 141 | + def _compute_loss(self, X_train, Xtilda, MASK, W, H): |
| 142 | + """ |
| 143 | + Compute binary cross-entropy loss. |
| 144 | +
|
| 145 | + Parameters: |
| 146 | + - X_train: Training interaction matrix. |
| 147 | + - Xtilda: Predicted matrix. |
| 148 | + - MASK: Binary mask matrix. |
| 149 | +
|
| 150 | + Returns: |
| 151 | + - loss: Binary cross-entropy loss. |
| 152 | + """ |
| 153 | + loss = -self.np.sum( |
| 154 | + MASK * (X_train * self.np.log(Xtilda + 1e-8) + (1 - X_train) * self.np.log(1 - Xtilda + 1e-8)) |
| 155 | + ) |
| 156 | + loss += self.l2_p * (self.np.sum(W ** 2) + self.np.sum(H ** 2)) |
| 157 | + return loss |
| 158 | + |
| 159 | + |
| 160 | + def _factorization_routine(self, W, H, row_bias, col_bias, MASK, Xtrain, losses): |
| 161 | + """ |
| 162 | + Performs matrix factorization using stochastic gradient descent (SGD) with regularization and optional early stopping. |
| 163 | +
|
| 164 | + This function iteratively optimizes the latent factor matrices (`W` and `H`), row biases, and column biases |
| 165 | + to minimize the reconstruction error between the observed entries in the input matrix (`Xtrain`) and the predicted |
| 166 | + matrix (`Xtilda`). It incorporates L2 regularization and supports early stopping if the loss improvement falls |
| 167 | + below a specified tolerance. |
| 168 | +
|
| 169 | + Parameters: |
| 170 | + W (numpy.ndarray): |
| 171 | + A matrix of shape `(num_rows, latent_factors)` representing the initial latent factors for rows. |
| 172 | + H (numpy.ndarray): |
| 173 | + A matrix of shape `(latent_factors, num_columns)` representing the initial latent factors for columns. |
| 174 | + row_bias (numpy.ndarray): |
| 175 | + A vector of shape `(num_rows, 1)` representing the row-wise biases. |
| 176 | + col_bias (numpy.ndarray): |
| 177 | + A vector of shape `(1, num_columns)` representing the column-wise biases. |
| 178 | + MASK (numpy.ndarray): |
| 179 | + A binary mask matrix of the same shape as `Xtrain`, where 1 indicates an observed entry and 0 indicates missing. |
| 180 | + Xtrain (numpy.ndarray): |
| 181 | + The observed training data matrix of shape `(num_rows, num_columns)`. |
| 182 | + losses (list or numpy.ndarray): |
| 183 | + A pre-allocated container to store the loss values at each epoch. |
| 184 | +
|
| 185 | + Returns: |
| 186 | + W (numpy.ndarray): |
| 187 | + The updated latent factor matrix for rows after optimization. |
| 188 | + H (numpy.ndarray): |
| 189 | + The updated latent factor matrix for columns after optimization. |
| 190 | + row_bias (numpy.ndarray): |
| 191 | + The updated row-wise biases. |
| 192 | + col_bias (numpy.ndarray): |
| 193 | + The updated column-wise biases. |
| 194 | + losses (list or numpy.ndarray): |
| 195 | + The updated list or array containing the training loss at each epoch. |
| 196 | +
|
| 197 | + Steps: |
| 198 | + 1. **Prediction**: The predicted matrix (`Xtilda`) is computed using the current `W`, `H`, `row_bias`, and `col_bias`. |
| 199 | + 2. **Error Calculation**: The reconstruction error is calculated only for observed entries using the binary mask (`MASK`). |
| 200 | + 3. **Gradient Calculation**: Gradients for `W`, `H`, `row_bias`, and `col_bias` are computed using the observed errors |
| 201 | + and L2 regularization. |
| 202 | + 4. **Parameter Updates**: The latent factor matrices (`W`, `H`) and biases (`row_bias`, `col_bias`) are updated using |
| 203 | + the gradients and a specified learning rate. |
| 204 | + 5. **Loss Calculation**: The reconstruction loss is computed for the current epoch and stored in the `losses` array. |
| 205 | + 6. **Early Stopping**: If the loss improvement between consecutive epochs falls below a predefined tolerance, the |
| 206 | + optimization process terminates early. |
| 207 | +
|
| 208 | + Notes: |
| 209 | + - The `_compute_loss` function is assumed to compute the loss using both observed reconstruction errors and regularization terms. |
| 210 | + - Early stopping can significantly reduce computation time when the optimization converges quickly. |
| 211 | + - The function updates the input parameters in-place, and the returned values reflect the final state after optimization. |
| 212 | + """ |
| 213 | + for epoch in tqdm(range(self.epochs)): |
| 214 | + # Compute Xtilda (predictions) |
| 215 | + Xtilda = self.predict(W, H, row_bias=row_bias, col_bias=col_bias) |
| 216 | + |
| 217 | + # Compute errors for observed entries |
| 218 | + errors = MASK * (Xtilda - Xtrain) |
| 219 | + |
| 220 | + # Gradients |
| 221 | + grad_W = self.np.dot(errors, H.T) + self.l2_p * W |
| 222 | + grad_H = self.np.dot(W.T, errors) + self.l2_p * H |
| 223 | + grad_row_bias = self.np.sum(errors, axis=1, keepdims=True) + self.l2_p * row_bias |
| 224 | + grad_col_bias = self.np.sum(errors, axis=0, keepdims=True) + self.l2_p * col_bias |
| 225 | + |
| 226 | + # Update embeddings and biases |
| 227 | + W -= self.learning_rate * grad_W |
| 228 | + H -= self.learning_rate * grad_H |
| 229 | + row_bias -= self.learning_rate * grad_row_bias |
| 230 | + col_bias -= self.learning_rate * grad_col_bias |
| 231 | + |
| 232 | + # Compute training loss |
| 233 | + loss = self._compute_loss(Xtrain, Xtilda, MASK, W, H) |
| 234 | + losses[epoch] = loss |
| 235 | + |
| 236 | + # Early stopping based on tolerance |
| 237 | + if self.tolerance is not None and (epoch > 0 and abs(losses[epoch] - losses[epoch-1]) < self.tolerance): |
| 238 | + print(f"Early stopping at epoch {epoch + 1}. Loss change below tolerance.") |
| 239 | + break |
| 240 | + |
| 241 | + return W, H, row_bias, col_bias, losses |
| 242 | + |
0 commit comments