|
| 1 | +""" |
| 2 | +This module contains functions for applying data poisoning techniques, |
| 3 | +including the application of noise to tensors and modification of datasets |
| 4 | +to simulate poisoning attacks. |
| 5 | +
|
| 6 | +Functions: |
| 7 | +- apply_noise: Applies noise to a tensor based on the specified noise type and poisoning ratio. |
| 8 | +- datapoison: Adds noise to a specified portion of a dataset for data poisoning purposes. |
| 9 | +- add_x_to_image: Adds an 'X' mark to the top-left corner of an image. |
| 10 | +- poison_to_nlp_rawdata: Poisons NLP data by setting word vectors to zero with a given probability. |
| 11 | +""" |
| 12 | + |
| 13 | +import copy |
| 14 | +import random |
| 15 | + |
| 16 | +import numpy as np |
| 17 | +import torch |
| 18 | +from skimage.util import random_noise |
| 19 | + |
| 20 | +from nebula.addons.attacks.dataset.datasetattack import DatasetAttack |
| 21 | + |
| 22 | +class SamplePoisoningAttack(DatasetAttack): |
| 23 | + """ |
| 24 | + Implements a data poisoning attack on a training dataset. |
| 25 | +
|
| 26 | + This attack introduces noise or modifies specific data points to influence |
| 27 | + the behavior of a machine learning model. |
| 28 | +
|
| 29 | + Args: |
| 30 | + engine (object): The training engine object, including the associated |
| 31 | + datamodule. |
| 32 | + attack_params (dict): Attack parameters including: |
| 33 | + - poisoned_percent (float): The percentage of data points to be poisoned. |
| 34 | + - poisoned_ratio (float): The ratio of poisoned data relative to the total dataset. |
| 35 | + - targeted (bool): Whether the attack is targeted at a specific label. |
| 36 | + - target_label (int): The target label for the attack (used if targeted is True). |
| 37 | + - noise_type (str): The type of noise to introduce during the attack. |
| 38 | + """ |
| 39 | + def __init__(self, engine, attack_params): |
| 40 | + """ |
| 41 | + Initializes the SamplePoisoningAttack with the specified engine and parameters. |
| 42 | +
|
| 43 | + Args: |
| 44 | + engine (object): The training engine object. |
| 45 | + attack_params (dict): Dictionary of attack parameters. |
| 46 | + """ |
| 47 | + super().__init__(engine) |
| 48 | + self.datamodule = engine._trainer.datamodule |
| 49 | + self.poisoned_percent = float(attack_params["poisoned_percent"]) |
| 50 | + self.poisoned_ratio = float(attack_params["poisoned_ratio"]) |
| 51 | + self.targeted = attack_params["targeted"] |
| 52 | + self.target_label = int(attack_params["target_label"]) |
| 53 | + self.noise_type = attack_params["noise_type"] |
| 54 | + self.round_start_attack = int(attack_params["round_start_attack"]) |
| 55 | + self.round_stop_attack = int(attack_params["round_stop_attack"]) |
| 56 | + |
| 57 | + |
| 58 | + def apply_noise(self, t, noise_type, poisoned_ratio): |
| 59 | + """ |
| 60 | + Applies noise to a tensor based on the specified noise type and poisoning ratio. |
| 61 | +
|
| 62 | + Args: |
| 63 | + t (torch.Tensor): The input tensor to which noise will be applied. |
| 64 | + noise_type (str): The type of noise to apply. Supported types are: |
| 65 | + - "salt": Salt noise (binary salt-and-pepper noise with only 'salt'). |
| 66 | + - "gaussian": Gaussian noise with mean 0 and specified variance. |
| 67 | + - "s&p": Salt-and-pepper noise. |
| 68 | + - "nlp_rawdata": Applies a custom NLP raw data poisoning function. |
| 69 | + poisoned_ratio (float): The ratio or variance of noise to be applied, depending on the noise type. |
| 70 | +
|
| 71 | + Returns: |
| 72 | + torch.Tensor: The tensor with noise applied. If the noise type is not supported, |
| 73 | + returns the original tensor with an error message printed. |
| 74 | +
|
| 75 | + Raises: |
| 76 | + ValueError: If the specified noise_type is not supported. |
| 77 | +
|
| 78 | + Notes: |
| 79 | + - The "nlp_rawdata" noise type requires the custom `poison_to_nlp_rawdata` function. |
| 80 | + - Noise for types "salt", "gaussian", and "s&p" is generated using `random_noise` from |
| 81 | + the `skimage.util` package, and returned as a `torch.Tensor`. |
| 82 | + """ |
| 83 | + if noise_type == "salt": |
| 84 | + return torch.tensor(random_noise(t, mode=noise_type, amount=poisoned_ratio)) |
| 85 | + elif noise_type == "gaussian": |
| 86 | + return torch.tensor(random_noise(t, mode=noise_type, mean=0, var=poisoned_ratio, clip=True)) |
| 87 | + elif noise_type == "s&p": |
| 88 | + return torch.tensor(random_noise(t, mode=noise_type, amount=poisoned_ratio)) |
| 89 | + elif noise_type == "nlp_rawdata": |
| 90 | + return self.poison_to_nlp_rawdata(t, poisoned_ratio) |
| 91 | + else: |
| 92 | + print("ERROR: poison attack type not supported.") |
| 93 | + return t |
| 94 | + |
| 95 | + |
| 96 | + def datapoison( |
| 97 | + self, |
| 98 | + dataset, |
| 99 | + indices, |
| 100 | + poisoned_percent, |
| 101 | + poisoned_ratio, |
| 102 | + targeted=False, |
| 103 | + target_label=3, |
| 104 | + noise_type="salt", |
| 105 | + ): |
| 106 | + """ |
| 107 | + Adds noise to a specified portion of a dataset for data poisoning purposes. |
| 108 | +
|
| 109 | + This function applies noise to randomly selected samples within a dataset. |
| 110 | + Noise can be targeted or non-targeted. In non-targeted poisoning, random samples |
| 111 | + are chosen and altered using the specified noise type and ratio. In targeted poisoning, |
| 112 | + only samples with a specified label are altered by adding an 'X' pattern. |
| 113 | +
|
| 114 | + Args: |
| 115 | + dataset (Dataset): The dataset to poison, expected to have `.data` and `.targets` attributes. |
| 116 | + indices (list of int): The list of indices in the dataset to consider for poisoning. |
| 117 | + poisoned_percent (float): The percentage of `indices` to poison, as a fraction (0 <= poisoned_percent <= 1). |
| 118 | + poisoned_ratio (float): The intensity or probability parameter for the noise, depending on the noise type. |
| 119 | + targeted (bool, optional): If True, applies targeted poisoning by adding an 'X' only to samples with `target_label`. |
| 120 | + Default is False. |
| 121 | + target_label (int, optional): The label to target when `targeted` is True. Default is 3. |
| 122 | + noise_type (str, optional): The type of noise to apply in non-targeted poisoning. Supported types are: |
| 123 | + - "salt": Applies salt noise. |
| 124 | + - "gaussian": Applies Gaussian noise. |
| 125 | + - "s&p": Applies salt-and-pepper noise. |
| 126 | + Default is "salt". |
| 127 | +
|
| 128 | + Returns: |
| 129 | + Dataset: A deep copy of the original dataset with poisoned data in `.data`. |
| 130 | +
|
| 131 | + Raises: |
| 132 | + ValueError: If `poisoned_percent` is not between 0 and 1, or if `noise_type` is unsupported. |
| 133 | +
|
| 134 | + Notes: |
| 135 | + - Non-targeted poisoning randomly selects samples from `indices` based on `poisoned_percent`. |
| 136 | + - Targeted poisoning modifies only samples with `target_label` by adding an 'X' pattern, regardless of `poisoned_ratio`. |
| 137 | + """ |
| 138 | + new_dataset = copy.deepcopy(dataset) |
| 139 | + train_data = new_dataset.data |
| 140 | + targets = new_dataset.targets |
| 141 | + num_indices = len(indices) |
| 142 | + if not isinstance(noise_type, str): |
| 143 | + noise_type = noise_type[0] |
| 144 | + |
| 145 | + if not targeted: |
| 146 | + num_poisoned = int(poisoned_percent * num_indices) |
| 147 | + if num_indices == 0: |
| 148 | + return new_dataset |
| 149 | + if num_poisoned > num_indices: |
| 150 | + return new_dataset |
| 151 | + poisoned_indice = random.sample(indices, num_poisoned) |
| 152 | + |
| 153 | + for i in poisoned_indice: |
| 154 | + t = train_data[i] |
| 155 | + poisoned = self.apply_noise(t, noise_type, poisoned_ratio) |
| 156 | + train_data[i] = poisoned |
| 157 | + else: |
| 158 | + for i in indices: |
| 159 | + if int(targets[i]) == int(target_label): |
| 160 | + t = train_data[i] |
| 161 | + poisoned = self.add_x_to_image(t) |
| 162 | + train_data[i] = poisoned |
| 163 | + new_dataset.data = train_data |
| 164 | + return new_dataset |
| 165 | + |
| 166 | + |
| 167 | + def add_x_to_image(self, img): |
| 168 | + """ |
| 169 | + Adds a 10x10 pixel 'X' mark to the top-left corner of an image. |
| 170 | +
|
| 171 | + This function modifies the input image by setting specific pixels in the |
| 172 | + top-left 10x10 region to a high intensity value, forming an 'X' shape. |
| 173 | + Pixels on or below the main diagonal and above the secondary diagonal |
| 174 | + are set to 255 (white). |
| 175 | +
|
| 176 | + Args: |
| 177 | + img (array-like): A 2D array or image tensor representing pixel values. |
| 178 | + It is expected to be in grayscale, where each pixel |
| 179 | + has a single intensity value. |
| 180 | +
|
| 181 | + Returns: |
| 182 | + torch.Tensor: A tensor representation of the modified image with the 'X' mark. |
| 183 | + """ |
| 184 | + for i in range(0, 10): |
| 185 | + for j in range(0, 10): |
| 186 | + if i + j <= 9 or i == j: |
| 187 | + img[i][j] = 255 |
| 188 | + return torch.tensor(img) |
| 189 | + |
| 190 | + |
| 191 | + def poison_to_nlp_rawdata(self, text_data, poisoned_ratio): |
| 192 | + """ |
| 193 | + Poisons NLP data by setting word vectors to zero with a given probability. |
| 194 | +
|
| 195 | + This function randomly selects a portion of non-zero word vectors in the |
| 196 | + input text data and sets them to zero vectors based on the specified |
| 197 | + poisoning ratio. This simulates a form of data corruption by partially |
| 198 | + nullifying the information in the input data. |
| 199 | +
|
| 200 | + Args: |
| 201 | + text_data (list of torch.Tensor): A list where each entry is a tensor |
| 202 | + representing a word vector. Non-zero vectors are assumed to represent valid words. |
| 203 | + poisoned_ratio (float): The fraction of non-zero word vectors to set to zero, |
| 204 | + where 0 <= poisoned_ratio <= 1. |
| 205 | +
|
| 206 | + Returns: |
| 207 | + list of torch.Tensor: The modified text data with some word vectors set to zero. |
| 208 | +
|
| 209 | + Raises: |
| 210 | + ValueError: If `poisoned_ratio` is greater than 1 or less than 0. |
| 211 | +
|
| 212 | + Notes: |
| 213 | + - `poisoned_ratio` controls the percentage of non-zero vectors to poison. |
| 214 | + - If `num_poisoned_token` is zero or exceeds the number of non-zero vectors, |
| 215 | + the function returns the original `text_data` without modification. |
| 216 | + """ |
| 217 | + non_zero_vector_indice = [i for i in range(0, len(text_data)) if text_data[i][0] != 0] |
| 218 | + non_zero_vector_len = len(non_zero_vector_indice) |
| 219 | + |
| 220 | + num_poisoned_token = int(poisoned_ratio * non_zero_vector_len) |
| 221 | + if num_poisoned_token == 0: |
| 222 | + return text_data |
| 223 | + if num_poisoned_token > non_zero_vector_len: |
| 224 | + return text_data |
| 225 | + |
| 226 | + poisoned_token_indice = random.sample(non_zero_vector_indice, num_poisoned_token) |
| 227 | + zero_vector = torch.Tensor(np.zeros(len(text_data[0][0]))) |
| 228 | + for i in poisoned_token_indice: |
| 229 | + text_data[i] = zero_vector |
| 230 | + return text_data |
| 231 | + |
| 232 | + |
| 233 | + def get_malicious_dataset(self): |
| 234 | + """ |
| 235 | + Generates a poisoned dataset based on the specified parameters. |
| 236 | +
|
| 237 | + Returns: |
| 238 | + Dataset: A modified version of the training dataset with poisoned data. |
| 239 | + """ |
| 240 | + return self.datapoison( |
| 241 | + self.datamodule.train_set, |
| 242 | + self.datamodule.train_set_indices, |
| 243 | + self.poisoned_percent, |
| 244 | + self.poisoned_ratio, |
| 245 | + self.targeted, |
| 246 | + self.target_label, |
| 247 | + self.noise_type |
| 248 | + ) |
| 249 | + |
0 commit comments