diff --git a/examples/privacy/gradient_leakage_attack/LICENSE.md b/examples/privacy/gradient_leakage_attack/LICENSE.md new file mode 100644 index 0000000000000000000000000000000000000000..c4b49ed8f4ac20f8349dae9fcbc54d72875b2fbc --- /dev/null +++ b/examples/privacy/gradient_leakage_attack/LICENSE.md @@ -0,0 +1,13 @@ +Copyright 2024 Huawei Technologies Co., Ltd + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/examples/privacy/gradient_leakage_attack/README.md b/examples/privacy/gradient_leakage_attack/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3f5442880d1bb4250a1f0bcd655913ec60e39e74 --- /dev/null +++ b/examples/privacy/gradient_leakage_attack/README.md @@ -0,0 +1,73 @@ +# 梯度逆向攻击(Gradient Inversion Attack) + +使用 MindSpore 框架实现梯度逆向攻击(威胁模型可见参考文献[1]). + +我们提供了3种梯度逆向攻击算法(InvGrad, SeeThrough, StepWise,算法细节见参考文献[2-4])、3种数据类型以及2种模型组合的攻击实例。 + +同时,使用者可以参考我们提供的攻击测试实例,并基于我们提供的函数,快速扩展出更多的测试案例。 + +除此之外,我们还提供了差分隐私、梯度裁剪等防御方法防御梯度泄露攻击的功能。 + +## 配置 + +### 依赖 + +环境配置请参考 `minimal_environment.yml`。 + +### 硬件 + +我们建议使用GPU来运行这些代码。 + +## 使用 + +请在使用前准备好数据集。 + +### 数据集 + +你可以使用公用的机器学习数据集,如: *CIFAR-100*, *Tiny-ImageNet*. 请在使用前传入正确的数据路径 `--data_path`. + +或者你也可以使用自制的图像数据(224x224 px),请将其放置于文件夹 */custom_data/1_img/* 中。 + +### 运行攻击 + +我们提供了用户友好的运行方式。 +如果你想要运行攻击(with default configuration),只需要在终端运行: + +```shell +python main.py +``` + +或者你可以传入更多的参数: `--out_put`, `--data_path`, `--dataset`, `--model`, `--alg_name`, `--defense`, `--num_data_points`, `--max_iterations`, +`--step_size`, `--TV_scale`, `--TV_start`, `--BN_scale`, `--BN_start` and `--callback`. + +| argument | description | +|-----------------|--------------------------------------------------------------------------| +| out_put | str: 输出路径 | +| data_path | str: 数据集的路径 | +| dataset | str: 'TinyImageNet', 'CIFAR100', 'WebImage' | +| model | str: 'resnet18', 'resnet34' | +| alg_name | str: 'InvGrad', 'SeeThrough', 'StepWise' | +| defense | str: 'None', 'Vicinal Augment', 'Differential Privacy', 'Gradient Prune' | +| num_data_points | int: 同时重构的数据量 | +| max_iterations | int: 攻击最大迭代次数 | +| step_size | float: 攻击时的优化步长 | +| TV_scale | float: Total Variation 正则项权重 | +| TV_start | int: Total Variation 开始时的步数 | +| BN_scale | float: Batch Normalization 正则项权重 | +| TV_start | int: Batch Normalization 开始时的步数 | +| callback | int: 每经过该步数输出一次攻击结果 | + +## 开源协议 + +请参见文件: `LICENSE.md`. + +## 参考文献 + +[1] Zhu, Ligeng, Zhijian Liu, and Song Han. "Deep leakage from gradients." in NeurIPS, 2019. + +[2] Geiping, Jonas, et al. "Inverting gradients-how easy is it to break privacy in federated learning?." in NeurIPS, 2020. + +[3] Yin, Hongxu, et al. "See through gradients: Image batch recovery via gradinversion." in CVPR, 2021. + +[4] Ye, Zipeng, et al. "High-Fidelity Gradient Inversion in Distributed Learning." in AAAI, 2024. + diff --git a/examples/privacy/gradient_leakage_attack/breaching/__init__.py b/examples/privacy/gradient_leakage_attack/breaching/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..386e89da3a51d0d901a4488aee4d34e973cb6689 --- /dev/null +++ b/examples/privacy/gradient_leakage_attack/breaching/__init__.py @@ -0,0 +1,33 @@ +""" +This module provides configs interfaces. +""" +from breaching import attacks + +__all__ = ["attacks"] + + +import hydra + + +def get_config(overrides): + """Return default hydra config.""" + with hydra.initialize(config_path="config", version_base="1.1"): + cfg = hydra.compose(config_name="cfg", overrides=overrides) + print(f"Investigating use case {cfg.case.name} with server type {cfg.case.server.name}.") + return cfg + + +def get_attack_config(attack="invertinggradients", overrides=None): + """Return default hydra config for a given attack.""" + with hydra.initialize(config_path="config/attack", version_base="1.1"): + cfg = hydra.compose(config_name=attack, overrides=overrides) + print(f"Loading attack configuration {cfg.attack_type}-{cfg.type}.") + return cfg + + +def get_case_config(case="1_single_image_small", overrides=None): + """Return default hydra config for a given attack.""" + with hydra.initialize(config_path="config/case", version_base="1.1"): + cfg = hydra.compose(config_name=case, overrides=overrides) + print(f"Investigating use case {cfg.name} with server type {cfg.server.name}.") + return cfg diff --git a/examples/privacy/gradient_leakage_attack/breaching/attacks/__init__.py b/examples/privacy/gradient_leakage_attack/breaching/attacks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d15a0d89d5b123aa9a9b1f7807c1df381d90880a --- /dev/null +++ b/examples/privacy/gradient_leakage_attack/breaching/attacks/__init__.py @@ -0,0 +1,17 @@ +""" +This module prepares attacks. +""" +from .optimization_based_attack import OptimizationBasedAttacker + + +def prepare_attack(model, loss, cfg_attack): + """prepares attack optimization methods.""" + if cfg_attack.attack_type == "optimization": + attacker = OptimizationBasedAttacker(model, loss, cfg_attack) + else: + raise ValueError(f"Invalid type of attack {cfg_attack.attack_type} given.") + + return attacker + + +__all__ = ["prepare_attack"] diff --git a/examples/privacy/gradient_leakage_attack/breaching/attacks/auxiliaries/__init__.py b/examples/privacy/gradient_leakage_attack/breaching/attacks/auxiliaries/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/examples/privacy/gradient_leakage_attack/breaching/attacks/auxiliaries/common.py b/examples/privacy/gradient_leakage_attack/breaching/attacks/auxiliaries/common.py new file mode 100644 index 0000000000000000000000000000000000000000..c9845f4b9b601ffeb16904413fc941d2e5a3ea1b --- /dev/null +++ b/examples/privacy/gradient_leakage_attack/breaching/attacks/auxiliaries/common.py @@ -0,0 +1,40 @@ +"""Some attack optimizers configurations.""" +from mindspore import nn + + +class StepLR: + """step learning rate.""" + def __init__(self, step_size, max_iter, gamma=0.33): + self.max_it = max_iter + self.mile1, self.mile2, self.mile3 = max_iter//2.667, max_iter//1.6, max_iter//1.142 + self.lr0 = step_size + self.lr1 = self.lr0 * gamma + self.lr2 = self.lr1 * gamma + self.lr3 = self.lr2 * gamma + + def __call__(self, cur_step): + if cur_step < self.mile1: + return self.lr0 + if cur_step < self.mile2: + return self.lr1 + if cur_step < self.mile3: + return self.lr2 + return self.lr3 + + +def optimizer_lookup(params, optim_name, step_size, scheduler=None, max_iterations=10_000): + """optimizer configs.""" + if optim_name.lower() == "adam": + optimizer = nn.Adam([params], learning_rate=step_size) + elif optim_name.lower() == "sgd": + optimizer = nn.SGD([params], learning_rate=step_size, momentum=0.0) + else: + raise ValueError(f"Invalid optimizer {optim_name} given.") + + if scheduler == "step-lr": + scheduler = StepLR(step_size, max_iterations) + elif scheduler == "cosine-decay": + scheduler = nn.CosineDecayLR(0.0, step_size, max_iterations) + elif scheduler == "linear": + scheduler = nn.PolynomialDecayLR(step_size, 0.0, max_iterations, power=1.0) + return optimizer, scheduler diff --git a/examples/privacy/gradient_leakage_attack/breaching/attacks/auxiliaries/deepinversion.py b/examples/privacy/gradient_leakage_attack/breaching/attacks/auxiliaries/deepinversion.py new file mode 100644 index 0000000000000000000000000000000000000000..423f2524d40948c15c95fc9672252a86295a0bd3 --- /dev/null +++ b/examples/privacy/gradient_leakage_attack/breaching/attacks/auxiliaries/deepinversion.py @@ -0,0 +1,27 @@ +"""Deepinversion Regurarizer.""" +import mindspore as ms + + +class DeepInversionFeatureHook: + """Deepinversion Regurarizer.""" + def __init__(self, module): + self.hook = module.register_forward_hook(self.hook_fn) + self.module = module + self.r_feature = 0 + + def hook_fn(self, ids, inputs, output): + """hook function.""" + if isinstance(output, int): + print(ids) + nch = inputs[0].shape[1] + mean = inputs[0].mean([0, 2, 3]) + var = ms.ops.transpose(inputs[0], (1, 0, 2, 3)).reshape([nch, -1]).var(1) + + r_feature = (ms.ops.norm(self.module.moving_variance.value() - var, 2) + + ms.ops.norm(self.module.moving_mean.value() - mean, 2)) + + self.r_feature = r_feature + + def close(self): + """remove hook.""" + self.hook.remove() diff --git a/examples/privacy/gradient_leakage_attack/breaching/attacks/auxiliaries/objectives.py b/examples/privacy/gradient_leakage_attack/breaching/attacks/auxiliaries/objectives.py new file mode 100644 index 0000000000000000000000000000000000000000..75586e3b997c7dc5b1533e3683f5bfc39d5c0983 --- /dev/null +++ b/examples/privacy/gradient_leakage_attack/breaching/attacks/auxiliaries/objectives.py @@ -0,0 +1,143 @@ +"""objective functions.""" +import mindspore as ms +from mindspore import ops + + +class GradientLoss: + """gradient matching loss.""" + def __init__(self): + self.model = None + self.cfg_impl = None + + def initialize(self, cfg_impl, model): + self.model = model + self.cfg_impl = cfg_impl + + def get_matching_loss(self, gradient_data, candidate, labels): + gradient, _ = self._grad_fn_single_step(candidate, labels) + objective = self.gradient_based_loss(gradient, gradient_data) + return objective + + def gradient_based_loss(self, gradient_rec, gradient_data): + raise NotImplementedError() + + def _grad_fn_single_step(self, candidate, labels): + def _single_forward(candidate, labels): + predict = self.model(candidate) + task_loss = self._loss_fn(predict, labels) + return task_loss + grad_fn1 = ops.value_and_grad(_single_forward, None, weights=self.model.trainable_params()) + task_loss, gradient = grad_fn1(candidate, labels) + return gradient, task_loss + + def _loss_fn(self, logits, labels): + logs = ops.log(ops.softmax(logits)) + loss = self._nll_loss(logs, labels) + return loss + + def _nll_loss(self, logs, labels): + loss = [-logs[i, int(j)] for i, j in enumerate(labels)] + loss = ops.stack(loss).mean() + return loss + + +class Euclidean(GradientLoss): + """euclidean distance.""" + def __init__(self, scale=1.0, start=0, min_start=0, broken_tail=0, peroid_Add10=1000, add10=10, **kwargs): + super().__init__() + self.tmp = kwargs + self.scale = scale + self.start = start + self.min_start = min_start + self.broken_tail = broken_tail + self.peroid_add = peroid_Add10 + self.add10 = add10 + self.iter = 0 + + def gradient_based_loss(self, gradient_rec, gradient_data): + len_layer = len(gradient_data) + final = len_layer + if (self.iter + 1) % self.peroid_add == 0: + self.start = self.start - self.add10 if (self.start - self.add10) > self.min_start else self.min_start + self.iter += 1 + objective = 0 + for count, (rec, data) in enumerate(zip(gradient_rec, gradient_data)): + if self.start <= count + 1 <= final: + objective += (rec - data).pow(2).sum() + return 0.5 * objective * self.scale + + @staticmethod + def _euclidean(gradient_rec, gradient_data): + """euclidean similarity.""" + objective = 0 + for rec, data in zip(gradient_rec, gradient_data): + objective += (rec - data).pow(2).sum() + return 0.5 * objective + + +class CosineSimilarity(GradientLoss): + """cosine distance.""" + def __init__(self, scale=1.0, task_regularization=0.0, **kwargs): + super().__init__() + self.tmp = kwargs + self.scale = scale + self.task_regularization = task_regularization + + def gradient_based_loss(self, gradient_rec, gradient_data): + return self._cosine_sim(gradient_rec, gradient_data) * self.scale + + @staticmethod + def _cosine_sim(gradient_rec, gradient_data): + """consine similarity.""" + scalar_product = ms.Tensor(0.0) + rec_norm = ms.Tensor(0.0) + data_norm = ms.Tensor(0.0) + + for rec, data in zip(gradient_rec, gradient_data): + scalar_product += (rec * data).sum() + rec_norm += rec.pow(2).sum() + data_norm += data.pow(2).sum() + + objective = 1.0 - scalar_product / (rec_norm.sqrt() * data_norm.sqrt()) + return objective + + +class DynaLayerRandPickedCosineSimilarity(GradientLoss): + """dynamic distance.""" + def __init__(self, scale=1.0, start=0, min_start=0, broken_tail=0, peroid_Add10=1000, add10=10, **kwargs): + super().__init__() + self.tmp = kwargs + self.scale = scale + self.start = start + self.min_start = min_start + self.broken_tail = broken_tail + self.peroid_add = peroid_Add10 + self.add10 = add10 + self.iter = 0 + + def gradient_based_loss(self, gradient_rec, gradient_data): + len_layer = len(gradient_data) + final = len_layer - self.broken_tail + scalar_product, rec_norm, data_norm = 0.0, 0.0, 0.0 + if (self.iter+1) % self.peroid_add == 0: + self.start = self.start-self.add10 if (self.start-self.add10) > self.min_start else self.min_start + self.iter += 1 + for count, (rec, data) in enumerate(zip(gradient_rec, gradient_data)): + if self.start <= count+1 <= final: + mask = ms.ops.rand_like(data) > 0.0 + weight = 1.0 + scalar_product += (rec * data * mask).sum()*weight + rec_norm += (rec * mask).pow(2).sum()*weight + data_norm += (data * mask).pow(2).sum()*weight + + objective = 1 - scalar_product / rec_norm.sqrt() / data_norm.sqrt() + + return objective * self.scale + + + +objective_lookup = { + "euclidean": Euclidean, + "cosine-similarity": CosineSimilarity, + "dyna-layer-rand-cosine-similarity": DynaLayerRandPickedCosineSimilarity, +} diff --git a/examples/privacy/gradient_leakage_attack/breaching/attacks/auxiliaries/regularizers.py b/examples/privacy/gradient_leakage_attack/breaching/attacks/auxiliaries/regularizers.py new file mode 100644 index 0000000000000000000000000000000000000000..6e8956b812ab83cbec7f30f7f3abfbd67ed9038b --- /dev/null +++ b/examples/privacy/gradient_leakage_attack/breaching/attacks/auxiliaries/regularizers.py @@ -0,0 +1,94 @@ +"""all regularizers.""" +import mindspore as ms +from mindspore import nn +from .deepinversion import DeepInversionFeatureHook + + +class TotalVariation(nn.Cell): + """TV regularizer.""" + def __init__(self, scale=0.1, inner_exp=1, outer_exp=1, tv_start=0, double_opponents=False, eps=1e-8): + + super().__init__() + self.scale = scale + self.inner_exp = float(inner_exp) + self.outer_exp = float(outer_exp) + self.start = tv_start + self.eps = eps + self.double_opponents = double_opponents + + grad_weight = ms.Tensor([[0, 0, 0], [0, -1, 1], [0, 0, 0]]).unsqueeze(0).unsqueeze(1) + grad_weight = ms.ops.concat((ms.ops.transpose(grad_weight, (0, 1, 3, 2)), grad_weight), 0) + self.groups = 6 if self.double_opponents else 3 + self.weight = ms.ops.concat([grad_weight] * self.groups, 0) + self.conv = nn.Conv2d(3, 6, kernel_size=3, stride=1, pad_mode='pad', padding=1, dilation=1, group=self.groups) + self.conv.trainable_params()[0].set_data(self.weight) + + def initialize(self, models, *args, **kwargs): + pass + + def construct_(self, tensor, *args): + if args[0] < self.start: + return 100 + + diffs = self.conv(tensor) + + squares = (diffs.abs() + self.eps).pow(self.inner_exp) + squared_sums = (squares[:, 0::2] + squares[:, 1::2]).pow(self.outer_exp) + return squared_sums.mean() * self.scale + + +class NormRegularization(nn.Cell): + """Norm regularizer.""" + def __init__(self, scale=0.1, pnorm=2.0, norm_start=0): + super().__init__() + self.scale = scale + self.pnorm = pnorm + self.start = norm_start + + def initialize(self, models, *args, **kwargs): + pass + + def construct_(self, tensor, *args): + if args[0] < self.start: + return 100 + return 1 / self.pnorm * tensor.pow(self.pnorm).mean() * self.scale + + +class DeepInversion(nn.Cell): + """DeepInversion regularizer.""" + def __init__(self, scale=0.1, first_bn_multiplier=10, second_bn_multiplier=10, + deep_inv_start=0, deep_inv_stop=3000): + super().__init__() + self.scale = scale + self.first_bn_multiplier = first_bn_multiplier + self.second_bn_multiplier = second_bn_multiplier + self.start = deep_inv_start + self.stop = deep_inv_stop + self.losses = [] + + def initialize(self, models): + self.losses = [] + model = models[0] + for _, module in model.cells_and_names(): + if isinstance(module, nn.BatchNorm2d): + self.losses.append(DeepInversionFeatureHook(module)) + + def construct_(self, tensor, *args): + """calculate DeepInversion loss.""" + if isinstance(tensor, int): + print('DeepInversion Regularization') + if args[0] < self.start: + return 100 + if args[0] > self.stop: + return 0 + rescale = [self.first_bn_multiplier, self.second_bn_multiplier] + [1.0 for _ in range(len(self.losses) - 2)] + feature_reg = 0 + feature_reg += sum([mod.r_feature * rescale[idx] for (idx, mod) in enumerate(self.losses)]) + return self.scale * feature_reg + + +regularizer_lookup = dict( + total_variation=TotalVariation, + norm=NormRegularization, + deep_inversion=DeepInversion, +) diff --git a/examples/privacy/gradient_leakage_attack/breaching/attacks/base_attack.py b/examples/privacy/gradient_leakage_attack/breaching/attacks/base_attack.py new file mode 100644 index 0000000000000000000000000000000000000000..68c432a288683606adbcb62c71bed5fc03da3716 --- /dev/null +++ b/examples/privacy/gradient_leakage_attack/breaching/attacks/base_attack.py @@ -0,0 +1,136 @@ +"""Base attack class.""" +import copy +import mindspore as ms +from mindspore import ops +from .auxiliaries.common import optimizer_lookup + + +class _BaseAttacker: + """Base attack class.""" + def __init__(self, model, loss_fn, cfg_attack): + self.cfg = cfg_attack + self.model_template = copy.copy(model) + self.loss_fn = copy.copy(loss_fn) + self.data_shape = None + self.dm, self.ds = 0, 0 + self._rec_models = None + + def reconstruct(self, server_payload, shared_data, initial_data=None, custom=None): + raise NotImplementedError() + + def prepare_attack(self, server_payload, shared_data): + """prepare attack.""" + shared_data = shared_data.copy() + server_payload = server_payload.copy() + + metadata = server_payload[0]["metadata"] + self.data_shape = list(metadata.shape) + if hasattr(metadata, "mean"): + self.dm = ms.Tensor(list(metadata.mean))[None, :, None, None] + self.ds = ms.Tensor(list(metadata.std))[None, :, None, None] + else: + self.dm, self.ds = ms.Tensor(0), ms.Tensor(1) + + rec_models = self._construct_models_from_payload_and_buffers(server_payload, shared_data) + shared_data = self._cast_shared_data(shared_data) + self._rec_models = rec_models + + if shared_data[0]["metadata"]["labels"] is None: + labels = self._recover_label_information(shared_data) + else: + labels = copy.copy(shared_data[0]["metadata"]["labels"]) + labels = labels.astype(ms.int32) + return rec_models, labels + + def _construct_models_from_payload_and_buffers(self, server_payload, shared_data): + """construct models.""" + models = [] + for idx, payload in enumerate(server_payload): + + new_model = copy.copy(self.model_template) + + # Load parameters + parameters = payload["parameters"] + if shared_data[idx]["buffers"] is not None: + buffers = shared_data[idx]["buffers"] + new_model.set_train(False) + elif payload["buffers"] is not None: + buffers = payload["buffers"] + new_model.set_train(False) + else: + new_model.set_train(True) + buffers = [] + + for param, server_state in zip(new_model.trainable_params(), parameters): + param.set_data(server_state.value()) + for buffer, server_state in zip(new_model.untrainable_params(), buffers): + buffer.set_data(server_state.value()) + models.append(new_model) + return models + + def _cast_shared_data(self, shared_data): + """cast data""" + for data in shared_data: + data["gradients"] = [g for g in data["gradients"]] + if data["buffers"] is not None: + data["buffers"] = [b for b in data["buffers"]] + return shared_data + + def _initialize_data(self, data_shape): + """initialize data""" + init_type = self.cfg.init + if init_type == "randn": + candidate = ops.randn(data_shape) + elif init_type == "randn-trunc": + candidate = (ops.randn(data_shape) * 0.1).clamp(-0.1, 0.1) + elif init_type == "rand": + candidate = (ops.rand(data_shape) * 2) - 1.0 + elif init_type == "zeros": + candidate = ops.zeros(data_shape) + else: + raise ValueError(f"Unknown initialization scheme {init_type} given.") + + candidate.requires_grad = True + return candidate + + def _init_optimizer(self, candidate): + """init optimizer""" + optimizer, scheduler = optimizer_lookup( + candidate, + self.cfg.optim.optimizer, + self.cfg.optim.step_size, + scheduler=self.cfg.optim.step_size_decay, + max_iterations=self.cfg.optim.max_iterations, + ) + return optimizer, scheduler + + def _recover_label_information(self, user_data): + """recover label information""" + num_data_points = user_data[0]["metadata"]["num_data_points"] + num_classes = user_data[0]["gradients"][-1].shape[0] + + if self.cfg.label_strategy is None: + return None + if self.cfg.label_strategy == "iDLG": + label_list = [] + for shared_data in user_data: + last_weight_min = ms.ops.argmin(ops.sum(shared_data["gradients"][-2], dim=-1), axis=-1) + label_list += [last_weight_min.detach()] + labels = ops.stack(label_list) + elif self.cfg.label_strategy == "yin": + total_min_vals = ms.Tensor(0) + for shared_data in user_data: + total_min_vals += shared_data["gradients"][-2].min(axis=-1) + labels = total_min_vals.argsort()[:num_data_points] + print(labels.shape) + else: + raise ValueError(f"Invalid label recovery strategy {self.cfg.label_strategy} given.") + + if len(labels) < num_data_points: + labels = ms.ops.concat( + [labels, ms.ops.randint(0, num_classes, (num_data_points-len(labels)))] + ) + + labels = (labels.sort()[0]).astype(ms.int32) + print(f"Recovered labels {labels}.") + return labels diff --git a/examples/privacy/gradient_leakage_attack/breaching/attacks/optimization_based_attack.py b/examples/privacy/gradient_leakage_attack/breaching/attacks/optimization_based_attack.py new file mode 100644 index 0000000000000000000000000000000000000000..87d21aa5d33f34059a03347f5f84226ebd34e43a --- /dev/null +++ b/examples/privacy/gradient_leakage_attack/breaching/attacks/optimization_based_attack.py @@ -0,0 +1,137 @@ +"""optimization based attack.""" +import math +import time +import os +import mindspore as ms +from mindspore import ops +from PIL import Image +from .base_attack import _BaseAttacker +from .auxiliaries.regularizers import regularizer_lookup +from .auxiliaries.objectives import objective_lookup + +class OptimizationBasedAttacker(_BaseAttacker): + """optimization based attack.""" + def __init__(self, model, loss_fn, cfg_attack): + super().__init__(model, loss_fn, cfg_attack) + self.txt_path = '' + self.gt_img = None + self.save_flag = 0 + objective_fn = objective_lookup.get(self.cfg.objective.type) + if objective_fn is None: + raise ValueError(f"Unknown objective type {self.cfg.objective.type} given.") + self.objective = objective_fn(**self.cfg.objective) + self.regularizers = [] + try: + for key in self.cfg.regularization.keys(): + if self.cfg.regularization[key].scale > 0: + self.regularizers += [regularizer_lookup[key](**self.cfg.regularization[key])] + except AttributeError: + pass + + def reconstruct(self, server_payload, shared_data, initial_data=None, custom=None): + print(f'initialized data: {initial_data}') + rec_models, labels = self.prepare_attack(server_payload, shared_data) + candidate_solutions = self._run_trial(rec_models, shared_data, labels, custom) + reconstructed_data = dict(data=candidate_solutions, labels=labels) + + return reconstructed_data + + def _run_trial(self, rec_model, shared_data, labels, custom=None): + """run attack.""" + open(self.txt_path, 'w', encoding='utf-8') + self.gt_img = Image.open(self.cfg.save_dir + 'A_0.png') + for regularizer in self.regularizers: + regularizer.initialize(rec_model) + self.objective.initialize(self.cfg.impl, rec_model[0]) + candidate_all, minimal_list = [], [] + optimizer, scheduler = [], [] + for seed in range(1): + candidate_tmp = self._initialize_data([shared_data[0]["metadata"]["num_data_points"], *self.data_shape]) + candidate_tmp = ms.Parameter(candidate_tmp) + candidate_all.append(candidate_tmp) + minimal_list.append(ms.Tensor(float("inf"))) + best_candidate = ops.randn_like(candidate_tmp) + opt_tmp, sched_tmp = self._init_optimizer(candidate_all[seed]) + opt_tmp.update_parameters_name('data') + optimizer.append(opt_tmp) + scheduler.append(sched_tmp) + current_wallclock = time.time() + self.save_flag = 0 + + for iteration in range(self.cfg.optim.max_iterations): + for seed in range(1): + candidate = candidate_all[seed] + optim = optimizer[seed] + grad_fn = ms.value_and_grad(self._forward_total_loss, None, optim.parameters) + objective_value = self._get_obj_and_step(candidate, shared_data[0]["gradients"], + labels, iteration, grad_fn, optim, scheduler[seed]) + if self.cfg.optim.boxed: + max_p = (1 - self.dm) / self.ds + min_p = -self.dm / self.ds + candidate.set_data(ops.clamp(candidate, min_p, max_p)) + if "peroid_Add10" in self.cfg.objective.keys(): + if objective_value < minimal_list[seed] or (iteration+1) % self.cfg.objective.peroid_Add10 == 0: + minimal_list[seed] = objective_value.item() + best_candidate = ms.Tensor(candidate) + elif objective_value < minimal_list[seed]: + minimal_list[seed] = objective_value.item() + best_candidate = ms.Tensor(candidate) + + if iteration + 1 == self.cfg.optim.max_iterations or iteration % self.cfg.optim.callback == 0: + timestamp = time.time() + obj_value = math.modf(float(objective_value))[0] + math.modf(float(objective_value))[1] % 10 + print(f"{self.save_flag}|| It: {iteration + 1} | Rec. loss: {obj_value:2.4f} | " + f"T: {timestamp - current_wallclock:4.2f}s\n") + with open(self.txt_path, 'a', encoding='utf-8') as f: + f.write( + f"{self.save_flag}|| It: {iteration + 1} | Rec. loss: {obj_value:2.4f} | " + f"T: {timestamp - current_wallclock:4.2f}s\n" + ) + current_wallclock = timestamp + if custom is not None: + if "save_dir" not in self.cfg.keys(): + raise AttributeError('saving path is not given!!!!!!!!') + if not os.path.exists(self.cfg.save_dir): + os.mkdir(self.cfg.save_dir) + save_path = self.cfg.save_dir + f'recon_{iteration + 1}.png' + custom.save_recover(best_candidate, save_pth=save_path) + self.save_flag += 1 + if not ms.ops.isfinite(objective_value): + print(f"Recovery loss is non-finite in iteration {iteration}, seed {seed}. " + f"Cancelling reconstruction!") + break + return best_candidate + + def _get_obj_and_step(self, candidate, shared_grads, labels, iteration, grad_fn, optim, sched): + """one-step update.""" + total_objective, grad = grad_fn(candidate, shared_grads, labels, iteration) + lr = sched(ms.Tensor(iteration)) + optim.learning_rate.set_data(lr) + grad = grad[0] + if self.cfg.optim.langevin_noise > 0: + step_size = lr + noise_map = ops.randn_like(candidate.value()) + grad += self.cfg.optim.langevin_noise * step_size * noise_map + if self.cfg.optim.grad_clip is not None: + grad_norm = candidate.value().norm() + if grad_norm > self.cfg.optim.grad_clip: + grad *= self.cfg.optim.grad_clip / (grad_norm + 1e-8) + if self.cfg.optim.signed is not None: + if self.cfg.optim.signed == "soft": + scaling_factor = 1 - iteration / self.cfg.optim.max_iterations + grad = ops.tanh(grad*scaling_factor)/scaling_factor + elif self.cfg.optim.signed == "hard": + grad = ops.sign(grad) + else: + pass + grad = tuple([grad]) + optim(grad) + return total_objective + + def _forward_total_loss(self, candidate, shared_grads, labels, iteration): + objective_tmp = self.objective.get_matching_loss(shared_grads, candidate, labels) + total_objective = 0 + total_objective += objective_tmp + for regularizer in self.regularizers: + total_objective += regularizer.construct_(candidate, iteration) + return total_objective diff --git a/examples/privacy/gradient_leakage_attack/custom_dataset.py b/examples/privacy/gradient_leakage_attack/custom_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..a21caa149f22c3adb99b6e17e6568a39863d8fde --- /dev/null +++ b/examples/privacy/gradient_leakage_attack/custom_dataset.py @@ -0,0 +1,92 @@ +""" +This module processes data. +""" +import os +import random +import hydra +from PIL import Image +import mindspore as ms +from mindspore import ops +from mindspore.dataset import transforms, vision +import numpy as np + + +class CustomData: + """CustomData class, which is used to process data.""" + def __init__(self, data_dir, dataset_name, number_data_points): + self.data_dir = data_dir + self.dataset_name = dataset_name + self.num_data = number_data_points + self.extract_mean_std() + + def get_data_cfg(self): + with hydra.initialize(config_path='breaching/config/case/data', version_base='1.1'): + cfg = hydra.compose(config_name=self.dataset_name) + return cfg + + def extract_mean_std(self): + cfg = self.get_data_cfg() + self.mean = ms.Tensor(list(cfg.mean))[None, :, None, None] + self.std = ms.Tensor(list(cfg.std))[None, :, None, None] + + def process_data(self): + """process data.""" + trans = transforms.Compose( + [ + vision.Resize(size=(224)), + vision.Rescale(1.0 / 255.0, 0), + vision.HWC2CHW(), + ] + ) + file_name_li = os.listdir(self.data_dir) + file_name_list = sorted(file_name_li, key=lambda x: int(x.split('-')[0])) + assert len(file_name_list) >= int(self.num_data) + imgs = [] + labels_ = [] + random.shuffle(file_name_list) + for file_name in file_name_list[0:int(self.num_data)]: + img = Image.open(self.data_dir+file_name).convert("RGB") + tmp_img = ms.Tensor(trans(img)[0]) + imgs.append(tmp_img[None, :]) + label = int(file_name.split('-')[0]) + labels_.append(label) + imgs = ops.concat(imgs, 0) + labels = ms.Tensor(labels_) + inputs = (imgs-self.mean)/self.std + return dict(inputs=inputs, labels=labels) + + def save_recover(self, recover, original=None, save_pth='', sature=False): + """save recovered data.""" + if original is not None: + if isinstance(recover, dict): + recover_imgs = ops.clip_by_value(recover['data']*self.std+self.mean, 0, 1) + if sature: + recover_imgs = vision.AdjustSaturation(saturation_factor=sature)(recover_imgs) + origina_imgs = ops.clip_by_value(original['data'] * self.std + self.mean, 0, 1) + all_img = ops.concat([recover_imgs, origina_imgs], 2) + else: + recover_imgs = ops.clip_by_value(recover * self.std + self.mean, 0, 1) + if sature: + recover_imgs = vision.AdjustSaturation(saturation_factor=sature)(recover_imgs) + origina_imgs = ops.clip_by_value(original['data'] * self.std + self.mean, 0, 1) + all_img = ops.concat([recover_imgs, origina_imgs], 2) + else: + if isinstance(recover, dict): + recover_imgs = ops.clip_by_value(recover['data'] * self.std + self.mean, 0, 1) + if sature: + recover_imgs = vision.AdjustSaturation(saturation_factor=sature)(recover_imgs) + all_img = recover_imgs + else: + recover_imgs = ops.clip_by_value(recover * self.std + self.mean, 0, 1) + if sature: + recover_imgs = vision.AdjustSaturation(saturation_factor=sature)(recover_imgs) + all_img = recover_imgs + self.save_array_img(all_img, save_pth) + + def save_array_img(self, img_4d, save_pth): + # img_4d: msTensor n*3*H*W, 0-1 + imgs = img_4d.asnumpy()*225 + all_imgs = [im for im in imgs] + tmp1 = np.concatenate(all_imgs, axis=-1) + tmp2 = np.uint8(np.transpose(tmp1, [1, 2, 0])) + Image.fromarray(tmp2, mode='RGB').save(save_pth) diff --git a/examples/privacy/gradient_leakage_attack/launch_attack.py b/examples/privacy/gradient_leakage_attack/launch_attack.py new file mode 100644 index 0000000000000000000000000000000000000000..e2b7a9d85ec09568302f7f0997310d4a671cdf48 --- /dev/null +++ b/examples/privacy/gradient_leakage_attack/launch_attack.py @@ -0,0 +1,100 @@ +"""launch gradient inversion attacks with different configs.""" +import breaching +import mindspore as ms +from custom_dataset import CustomData + + +def reset_cfg_by_client(cfg, args): + """reset default configs by input parameters.""" + cfg.case.user.num_data_points = int(args['custom_parameter']['num_data_points']) + cfg.attack.optim.max_iterations = int(args['custom_parameter']['max_iterations']) + cfg.attack.optim.step_size = float(args['custom_parameter']['step_size']) + cfg.attack.regularization.total_variation.scale = float(args['custom_parameter']['TV_scale']) + cfg.attack.regularization.total_variation.tv_start = int(args['custom_parameter']['TV_start']) + cfg.attack.optim.callback = int(args['custom_parameter']['callback']) + if 'invgrad' not in args['alg_name'].lower(): + cfg.attack.regularization.deep_inversion.scale = float(args['custom_parameter']['BN_scale']) + cfg.attack.regularization.deep_inversion.deep_inv_start = int(args['custom_parameter']['BN_start']) + + +def get_cfg(args): + """get configs.""" + if 'cifar' in args['dataset'].lower(): + case_ = "case=11_small_batch_cifar" + elif 'tiny' in args['dataset'].lower(): + case_ = "case=13_custom_tinyimagenet" + elif 'web' in args['dataset'].lower(): + case_ = "case=12_custom_imagenet" + else: + raise ValueError(f"Do not support dataset: {args['dataset']}!") + if 'invgrad' in args['alg_name'].lower(): + attack_ = "attack=invertinggradients" + elif 'seethrough' in args['alg_name'].lower(): + attack_ = "attack=seethrough_res18" if "resnet18" in args['model'].lower() else "attack=seethrough_res34" + elif 'stepwise' in args['alg_name'].lower(): + attack_ = "attack=stepwise_res18" if "resnet18" in args['model'].lower() else "attack=stepwise_res34" + else: + raise ValueError(f"Do not support algorithm: {args['alg_name']}!") + return breaching.get_config(overrides=[case_, attack_]) + + +def run_attack(args): + """run attacks based on the configs.""" + try: + ms.set_context(device_target="GPU", device_id=0) + print('using mindspore with GPU context') + except ValueError: + ms.set_context(device_target="CPU") + print('using mindspore with CPU context') + + cfg = get_cfg(args) + data_dir = './custom_data/1_img/' + + cfg.attack.save_dir = args['out_put'] + cfg.case.data.path = args['data_path'] + + cfg.case.data.partition = 'balanced' + cfg.case.data.smooth = 0 + cfg.case.user.user_idx = 0 + cfg.case.model = args['model'] + + cfg.case.user.provide_labels = False + cfg.case.user.provide_buffers = False + cfg.case.server.provide_public_buffers = True + cfg.case.server.pretrained = False + + # ---------根据用户自定义重置参数--------------------- + reset_cfg_by_client(cfg, args) + cus_defense = args['custom_parameter']['defense'] + sercure_input, apply_noise, apply_prune = False, False, False + if cus_defense == 'Vicinal Augment': + sercure_input = True + if cus_defense == 'Differential Privacy': + apply_noise = True + if cus_defense == 'Gradient Prune': + apply_prune = True + # --------------run-------------------------------- + user, server = breaching.cases.construct_case(cfg.case) + attacker = breaching.attacks.prepare_attack(server.model, server.loss, cfg.attack) + attacker.txt_path = args['out_put'] + args['eva_txt_file'] + server_payload = server.distribute_payload() + + if 'web' not in args['dataset'].lower(): + cus_data = CustomData(data_dir=data_dir, dataset_name=args['dataset'], + number_data_points=cfg.case.user.num_data_points) + shared_data, true_user_data = user.compute_local_updates(server_payload, secure_input=sercure_input, + apply_noise=apply_noise, apply_prune=apply_prune) + else: + cus_data = CustomData(data_dir=data_dir, dataset_name='ImageNet', + number_data_points=cfg.case.user.num_data_points) + shared_data, true_user_data = user.compute_local_updates(server_payload, custom_data=cus_data.process_data(), + secure_input=sercure_input, + apply_noise=apply_noise, apply_prune=apply_prune) + + true_pat = cfg.attack.save_dir + 'A_0.png' + cus_data.save_recover(true_user_data, save_pth=true_pat) + + ## ---------------attack part--------------------- + reconstructed_user_data = attacker.reconstruct([server_payload], [shared_data], {}, custom=cus_data) + recon_path__ = cfg.attack.save_dir + f'A_{attacker.save_flag}.png' + cus_data.save_recover(reconstructed_user_data, true_user_data, recon_path__) diff --git a/examples/privacy/gradient_leakage_attack/main.py b/examples/privacy/gradient_leakage_attack/main.py new file mode 100644 index 0000000000000000000000000000000000000000..3c1a22ee495122b6fa354a3bac740fd4e59d6752 --- /dev/null +++ b/examples/privacy/gradient_leakage_attack/main.py @@ -0,0 +1,61 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Pass in the arguments and execute the corresponding attack algorithms.""" + +import copy +import argparse +import os +from launch_attack import run_attack + + +def arg_parse(): + """parse the arguments.""" + parse = argparse.ArgumentParser() + parse.add_argument('--out_put', type=str, default='output/', help='directory for save outputs') + parse.add_argument('--data_path', type=str, default='./data', help='path of dataset') + parse.add_argument('--dataset', type=str, default="WebImage", choices=['TinyImageNet', 'CIFAR100', 'WebImage']) + parse.add_argument('--model', type=str, default="resnet34", choices=['resnet18', 'resnet34']) + parse.add_argument('--alg_name', type=str, default="StepWise", choices=['InvGrad', 'SeeThrough', 'StepWise']) + parse.add_argument('--defense', type=str, default="None", choices=['None', 'Vicinal Augment', + 'Differential Privacy', 'Gradient Prune']) + parse.add_argument('--num_data_points', type=int, default=4) + parse.add_argument('--max_iterations', type=int, default=7000) + parse.add_argument('--step_size', type=float, default=0.1) + parse.add_argument('--TV_scale', type=float, default=0.002) + parse.add_argument('--TV_start', type=int, default=2000) + parse.add_argument('--BN_scale', type=float, default=0.0001) + parse.add_argument('--BN_start', type=int, default=3000) + parse.add_argument('--callback', type=int, default=100) + + args_ = parse.parse_args() + args_.eva_txt_file = 'resultTXT.txt' + + return args_ + + +def run_main(args_): + arg_dic = copy.deepcopy(vars(args_)) + arg_dic['custom_parameter'] = {} + for key in vars(args_): + arg_dic['custom_parameter'][key] = getattr(args_, key) + if not os.path.exists(arg_dic['out_put']): + os.mkdir(arg_dic['out_put']) + run_attack(arg_dic) + print('Running over! Check the output in directory: ' + arg_dic['out_put']) + + +if __name__ == '__main__': + args = arg_parse() + run_main(args) diff --git a/examples/privacy/gradient_leakage_attack/minimal_environment.yml b/examples/privacy/gradient_leakage_attack/minimal_environment.yml new file mode 100644 index 0000000000000000000000000000000000000000..a56bcbe6d7176227828bb01ab497987f5e500f23 --- /dev/null +++ b/examples/privacy/gradient_leakage_attack/minimal_environment.yml @@ -0,0 +1,8 @@ +dependencies: + - python==3.9.0 + - mindspore==2.1.0 + - cuda==11.6 + - pip + - pip: + - hydra-core==1.2.0 + - omegaconf==2.2.0