-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathattack.py
More file actions
160 lines (135 loc) · 5.72 KB
/
attack.py
File metadata and controls
160 lines (135 loc) · 5.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import torch
import numpy as np
from typing import Callable, Iterable, Optional
def attack(model: Callable,
s_models: Iterable[torch.nn.Module],
attacks,
image: np.ndarray,
label: int,
targeted: bool,
device: Optional[torch.device]):
""" Run a black-box attack on 'model', using surrogate models 's_models'
and attackers 'attacks'
For each surrogate model and attacker, we create an attack against the
surrogate model, and use the resulting direction to create an attack
against the defense ('model'). This is done with a binary search along
this direction.
Parameters
----------
model : Callable
The model under attack. Should be a function that takes an image
x (H x W x C) with pixels from [0, 255] and returns the label (int)
s_models : List of torch models
The surrogate models. Each model should be a PyTorch nn.Module, that
takes an input x (B x H x W x C) with pixels from [0, 1], and returns
the pre-softmax activations (logits).
attacks : List of attack functions
List of attacks. Each attack should have a method as follows:
attack(model, inputs, labels, targeted) -> adv_image
image : np.ndarray
An image (H x W x C) with pixels ranging from [0, 255]
label : int
The true label (if targeted=True) or target label (if targeted=False)
targeted : bool
Wheter to run untargeted or a targeted attack
device : torch.device
Which device to use for the attacks
Returns
-------
np.ndarray:
The best adversarial image found against 'model'. None if no
adversarial is found.
"""
adversarial = None
best_norm = np.linalg.norm(np.maximum(255 - image, image))
original_label = model(image)
if not targeted and original_label != label:
# Image is already adversarial
return image
if targeted and original_label == label:
# Image is already adversarial
return image
if device is None:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
t_image = torch.tensor(image).float().div(255).permute(2, 0, 1)
t_image = t_image.unsqueeze(0).to(device)
t_label = torch.tensor(label, device=device).unsqueeze(0)
for s_m in s_models:
for attack in attacks:
adv_img = attack.attack(s_m, t_image, t_label, targeted).squeeze(0)
delta = adv_img.permute(1, 2, 0).cpu().numpy() * 255 - image
delta = np.round(delta)
norm = np.linalg.norm(delta)
if norm > 0:
# Run bound search
lower, upper, found = bound_search(model, image,
label, delta,
targeted=targeted)
if found:
norm = np.linalg.norm(upper)
if norm < best_norm:
adversarial = upper + image
best_norm = norm
# Run binary search
upper_, found_ = binary_search(model, image, label,
lower, upper, steps=10,
targeted=targeted)
if found_:
norm = np.linalg.norm(upper_)
if norm < best_norm:
adversarial = upper_ + image
best_norm = norm
return adversarial
def bound_search(model, image, label, delta, alpha=1, iters=9, targeted=False):
""" Coarse search for the decision boundary in direction delta """
def out_of_region(delta):
# Returns whether or not image+delta is outside the desired region
# (e.g. inside the class boundary for untargeted, outside the target
# class for targeted)
if targeted:
return model(image + delta) != label
else:
return model(image + delta) == label
if out_of_region(delta):
# increase the noise
lower = delta
upper = np.clip(image + np.round(delta * (1 + alpha)), 0, 255) - image
for _ in range(iters):
if out_of_region(upper):
lower = upper
adv = image + np.round(upper * (1 + alpha))
upper = np.clip(adv, 0, 255) - image
else:
return lower, upper, True
else:
# inside the region of interest. Decrease the noise
upper = delta
lower = np.clip(image + np.round(delta / (1 + alpha)), 0, 255) - image
for _ in range(iters):
if not out_of_region(lower):
upper = lower
adv = image + np.round(lower / (1 + alpha))
lower = np.clip(adv, 0, 255) - image
else:
return lower, upper, True
return np.zeros_like(delta), np.round(delta / delta.max() * 255), False
def binary_search(model, image, label, lower, upper, steps=10, targeted=False):
""" Binary search for the decision boundary in direction delta """
def out_of_region(delta):
# returns whether or not image+delta is outside the desired region
# (e.g. inside the class boundary for untargeted, outside the target
# class for targeted)
if targeted:
return model(image + delta) != label
else:
return model(image + delta) == label
found = False
for _ in range(steps):
middle = np.round((lower + upper) / 2)
middle = np.clip(image + middle, 0, 255) - image
if out_of_region(middle):
lower = middle
else:
upper = middle
found = True
return upper, found