forked from sommerda/privacybuckets
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexample_composition.py
136 lines (109 loc) · 5.5 KB
/
example_composition.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# written by David Sommer (david.sommer at inf.ethz.ch)
import traceback
import numpy as np
from core.probabilitybuckets_light import ProbabilityBuckets
from matplotlib import pyplot as plt
####
# Privacy Buckets is based on the following publication
####
# [*] S. Meiser, E. Mohammadi,
# [*] "Tight on Budget? Tight Bounds for r-Fold Approximate Differential Privacy",
# [*] Proceedings of the 25th ACM Conference on Computer and Communications Security (CCS), 2018
# We examine Extended Randomized Repsonse distributions defined as follows
delta=0.0001
eps_rr = 0.01
expeps = np.exp(eps_rr)
norm = (1. + expeps + delta)
distribution_A = np.array([0, 1./norm, expeps/norm, delta])
distribution_B = np.array([delta, expeps/norm, 1./norm, 0])
# Important: the individual input distributions neede each to sum up to 1 exactly!
distribution_A = distribution_A/np.sum(distribution_A)
distribution_B = distribution_B/np.sum(distribution_B)
### Choosing the number of buckets.
#
# We distribute the elements of distribution_A in the buckets according to the privacy loss L_A/B.
# Higher number-of_buckets allows more finegrade composition. The runtime if composition is quadratic in the number of buckets.
# A good value to start with is
number_of_buckets = 100000
# It is more beneficial to adapt the factor (see below) than the number-of-buckets.
# Once you understand how it influcences the accuracy, change it to any value you like and your hardware supports.
### Choosing the factor
#
# The factor is the average additive distance between the privacy-losses that are put in neighbouring buckets.
# You want to put most of you probablity mass of distribution_A in the buckets, as the rest gets put into the
# infitity_bucket and the minus_n-bucket. Therefore for an o
#
# L_A/B(o) = log(factor) * number_of_buckets / 2
#
# then the mass Pr[ L_A/B(o) > mass_infinity_bucket, o <- distribution_A] will be put into the infinity bucket.
# The infinity-bucket gros exponentially with the number of compositions. Chose the factor according to the
# probability mass you want to tolerate in the inifity bucket. For this example, the minimal factor should be
#
# log(factor) > eps
#
# as for randomized response, there is no privacy loss L_A/B greater than epsilon (excluding delta/infinity-bucket).
# We set the factor to
factor = 1 + 1e-4
# Initialize privacy buckets.
privacybuckets = ProbabilityBuckets(
number_of_buckets=number_of_buckets,
factor=factor,
dist1_array=distribution_A, # distribution A
dist2_array=distribution_B, # distribution B
caching_directory="./pb-cache", # caching makes re-evaluations faster. Can be turned off for some cases.
free_infty_budget=10**(-20), # how much we can put in the infty bucket before first squaring
error_correction=True, # error correction. See publication for details
)
# Now we evaluate how the distributon looks after 2**k independent compositions
k = 13
# input can be arbitrary positive integer, but exponents of 2 are numerically the most stable
privacybuckets_composed = privacybuckets.compose(2**k)
# Print status summary
privacybuckets_composed.print_state()
# Now we build the delta(eps) graphs from the computed distribution.
eps_vector = np.linspace(0, 3, 100)
upper_bound_delta = [privacybuckets_composed.delta_ADP_upper_bound(eps) for eps in eps_vector]
lower_bound_delta = [privacybuckets_composed.delta_ADP_lower_bound(eps) for eps in eps_vector]
plt.plot(eps_vector, upper_bound_delta, label="upper_bound")
plt.plot(eps_vector, lower_bound_delta, label="lower_bound")
plt.legend()
plt.title("Extended Randomized response with eps={:e}, delta={:f} after {:d} compositions".format(eps_rr, delta, 2**k))
plt.xlabel("eps")
plt.ylabel("delta")
plt.ticklabel_format(useOffset=False) # Hotfix for the behaviour of my current matplotlib version
plt.show()
# we can ask for a specific epsilon (upper bound) for a given delta directly.
# for illustration purposes, we do not just switch axis of the previous plot but call also the corresponding method.
delta_vector = np.linspace(0.75, 0.57, 100)
upper_bound_eps = [privacybuckets_composed.eps_ADP_upper_bound(delta) for delta in delta_vector]
plt.plot(delta_vector, upper_bound_eps, '--', alpha=0.5, label="upper_bound (class method)")
plt.plot(upper_bound_delta, eps_vector, alpha=0.5, label="upper_bound (axis switch of previous plot)")
plt.legend()
plt.title("Extended Randomized response with eps={:e}, delta={:f} after {:d} compositions".format(eps_rr, delta, 2**k))
plt.xlabel("delta")
plt.ylabel("eps")
plt.ticklabel_format(useOffset=False) # Hotfix for the behaviour of my current matplotlib version
plt.show()
# Illustration what happens when we call ProbabilityBuckets.eps_ADP_upper_bound(delta) with a unsuitable delta
try:
eps = privacybuckets_composed.eps_ADP_upper_bound(delta=.5)
except ValueError as e:
print("")
print("[*] THIS IS AN INTENTIONALLY INDUCED ERROR FOR ILLUSTRATION PURPOSES!")
print(" (target delta was too small.)")
print("")
traceback.print_exc()
print("")
print("[*] THIS WAS AN INTENTIONALLY INDUCED ERROR FOR ILLUSTRATION PURPOSES!")
print(" (target delta was too small.)")
print("")
print("[*] Here, the target delta is too big.")
eps = privacybuckets_composed.eps_ADP_upper_bound(delta=0.9)
print(f"for delta=0.9: eps={eps}")
print("")
# abusing internals, we can look at the bucket distribution
plt.plot(privacybuckets_composed.bucket_distribution)
plt.title("bucket distribution")
plt.xlabel("bucket number")
plt.ylabel("mass")
plt.show()