diff --git a/opacus/optimizers/adaclipoptimizer.py b/opacus/optimizers/adaclipoptimizer.py index 9498613d..0b51c6ca 100644 --- a/opacus/optimizers/adaclipoptimizer.py +++ b/opacus/optimizers/adaclipoptimizer.py @@ -64,18 +64,23 @@ def __init__( generator=generator, secure_mode=secure_mode, ) - assert ( - max_clipbound > min_clipbound - ), "max_clipbound must be larger than min_clipbound." + if max_clipbound <= min_clipbound: + raise ValueError("max_clipbound must be larger than min_clipbound.") + if noise_multiplier >= 2 * unclipped_num_std: + raise ValueError( + "noise_multiplier must be smaller than 2 * unclipped_num_std. This is a requirement stemming from Theorem 1 in https://arxiv.org/pdf/1905.03871.pdf" + ) + self.target_unclipped_quantile = target_unclipped_quantile self.clipbound_learning_rate = clipbound_learning_rate self.max_clipbound = max_clipbound self.min_clipbound = min_clipbound self.unclipped_num_std = unclipped_num_std # Theorem 1. in https://arxiv.org/pdf/1905.03871.pdf - self.noise_multiplier = ( - self.noise_multiplier ** (-2) - (2 * unclipped_num_std) ** (-2) - ) ** (-1 / 2) + if self.noise_multiplier > 0: # if noise_multiplier = 0 then it stays zero + self.noise_multiplier = ( + self.noise_multiplier ** (-2) - (2 * unclipped_num_std) ** (-2) + ) ** (-1 / 2) self.sample_size = 0 self.unclipped_num = 0 diff --git a/opacus/privacy_engine.py b/opacus/privacy_engine.py index cacacee0..2a2b205a 100644 --- a/opacus/privacy_engine.py +++ b/opacus/privacy_engine.py @@ -309,7 +309,10 @@ def make_private( noise_generator=None, grad_sample_mode: str = "hooks", **kwargs, - ) -> Tuple[GradSampleModule, DPOptimizer, DataLoader]: + ) -> Union[ + Tuple[GradSampleModule, DPOptimizer, DataLoader], + Tuple[GradSampleModule, DPOptimizer, DPLossFastGradientClipping, DataLoader], + ]: """ Add privacy-related responsibilities to the main PyTorch training objects: model, optimizer, and the data loader. @@ -319,6 +322,7 @@ def make_private( - Model is wrapped to also compute per sample gradients. - Optimizer is now responsible for gradient clipping and adding noise to the gradients. + - Criterion is a wrapper around the original criterion that packages the two backward passes for fast gradient clipping. - DataLoader is updated to perform Poisson sampling. Notes: @@ -359,12 +363,14 @@ def make_private( details Returns: - Tuple of (model, optimizer, data_loader). + Tuple of (model, optimizer, data_loader) or (model, optimizer, criterion, data_loader). Model is a wrapper around the original model that also computes per sample gradients Optimizer is a wrapper around the original optimizer that also does gradient clipping and noise addition to the gradients + Criterion is a wrapper around the original criterion that packages the two backward passes for fast gradient clipping. + Only returned when grad_sample_mode is "ghost". DataLoader is a brand new DataLoader object, constructed to behave as equivalent to the original data loader, possibly with updated sampling mechanism. Points to the same dataset object. @@ -453,7 +459,10 @@ def make_private_with_epsilon( noise_generator=None, grad_sample_mode: str = "hooks", **kwargs, - ): + ) -> Union[ + Tuple[GradSampleModule, DPOptimizer, DataLoader], + Tuple[GradSampleModule, DPOptimizer, DPLossFastGradientClipping, DataLoader], + ]: """ Version of :meth:`~opacus.privacy_engine.PrivacyEngine.make_private`, that calculates privacy parameters based on a given privacy budget. @@ -497,12 +506,14 @@ def make_private_with_epsilon( details Returns: - Tuple of (model, optimizer, data_loader). + Tuple of (model, optimizer, data_loader) or (model, optimizer, criterion, data_loader). Model is a wrapper around the original model that also computes per sample gradients Optimizer is a wrapper around the original optimizer that also does gradient clipping and noise addition to the gradients + Criterion is a wrapper around the original criterion that packages the two backward passes for fast gradient clipping. + Only returned when grad_sample_mode is "ghost". DataLoader is a brand new DataLoader object, constructed to behave as equivalent to the original data loader, possibly with updated sampling mechanism. Points to the same dataset object.