-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathestimateVC.py
More file actions
122 lines (101 loc) · 4.7 KB
/
estimateVC.py
File metadata and controls
122 lines (101 loc) · 4.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import math
import random
from math import comb
def random_labeled_sample(draw_random_example, sample_size):
return [(draw_random_example(), random.randint(0, 1)) for _ in range(sample_size)]
def estimate_can_shatter_v2(draw_random_example, can_induce, sample_size, num_trials, num_labelings=250):
for _ in range(num_trials):
unlabeled_sample = random_unlabeled_sample(draw_random_example, sample_size)
success = False
for _ in range(num_labelings):
labeled_sample = [(x, random.randint(0, 1)) for x in unlabeled_sample]
if not can_induce(labeled_sample):
success = False
break
success = True
if success:
return True
return False
def estimate_can_shatter(draw_random_example, can_induce, sample_size, num_trials):
"""
Estimates whether a concept class can shatter some sample of a certain size
:param draw_random_example: A function that returns a random example from any arbitrary distribution
:param can_induce: A function that takes in a labeled sample and returns whether the concept class of interest
can produce that labeling on that sample.
:param sample_size: The sample size to test
:param num_trials: How many randomized trials to run
:return:
"""
for _ in range(num_trials):
labeled_sample = random_labeled_sample(draw_random_example, sample_size)
if not can_induce(labeled_sample):
return False
return True
def phi(d, m):
return sum(comb(m, i) for i in range(d + 1))
def verify_saeur(draw_random_example, can_induce, vcd, num_trials):
"""
Checks whether phi(d, m) is too small for the given vc dimension. If it is, return that the proposed
vcd is inconsistent with Sauer's lemma.
:param draw_random_example: Function for returning a random example
:param can_induce: Function that takes in a labeled sample and returns whether that labeling can be induced
:param vcd: The proposed vc-dimension
:param num_trials: The number of trials to use for estimation
:return:
"""
total_labelings_induced = 0
sample_size = vcd * 2
for _ in range(num_trials):
labeled_sample = random_labeled_sample(draw_random_example, sample_size)
if can_induce(labeled_sample):
total_labelings_induced += 1
if total_labelings_induced == 0:
return False
est_labels_induced = (total_labelings_induced / num_trials) * (2 ** sample_size)
return est_labels_induced >= phi(vcd, sample_size)
def estimate_vc_bin_search(draw_random_example, can_induce, consistent_vc):
"""
A meta-algorithm for estimating the VC dimension
:param draw_random_example: Draw a random example from the distribution
:param can_induce: Function that takes in a labeled sample and returns whether that labeling can be induced
:param consistent_vc: A function that uses some metric to determine whether a given vc dimension is consistent
with empirical data.
:return: An estimate of the VC dimension
"""
# estimate a lower and upper bound on the VC dimension before performing a binary search for the VC dimension
lower_bound = 1
upper_bound = 1
while consistent_vc(draw_random_example, can_induce, upper_bound):
lower_bound = upper_bound
upper_bound *= 2
# perform a binary search for the estimated VC dimension
while abs(lower_bound - upper_bound) > 1:
middle = (lower_bound + upper_bound) // 2
if consistent_vc(draw_random_example, can_induce, middle):
lower_bound = middle
else:
upper_bound = middle
if not consistent_vc(draw_random_example, can_induce, upper_bound):
return lower_bound
return upper_bound
def estimate_vc_shattering(draw_random_example, can_induce, num_trials=500):
"""
Estimates the VC dimension based on an estimate of how many examples can be shattered
:param draw_random_example:
:param can_induce:
:param num_trials:
:return:
"""
consistent_vc = lambda draw, check, sample_size: estimate_can_shatter_v2(draw, check, sample_size, num_trials)
return estimate_vc_bin_search(draw_random_example, can_induce, consistent_vc)
def estimate_vc_sauer(draw_random_example, can_induce, num_trials=2000):
"""
Estimates the VC dimension based on how closely the estimated number of labellings that can be induced matches
Sauer's lemma
:param draw_random_example:
:param can_induce:
:param num_trials:
:return:
"""
consistent_vc = lambda draw, check, vc: verify_saeur(draw, check, vc, num_trials)
return estimate_vc_bin_search(draw_random_example, can_induce, consistent_vc)