feat: hw2
This commit is contained in:
parent
2cb1264cd0
commit
d50a247499
BIN
hw2/hw2_10.png
Normal file
BIN
hw2/hw2_10.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 27 KiB |
54
hw2/hw2_10.py
Normal file
54
hw2/hw2_10.py
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
def generate_data(length, noise_prob):
|
||||||
|
x = np.random.uniform(-1, 1, (length, ))
|
||||||
|
x = np.sort(x)
|
||||||
|
y = np.sign(x)
|
||||||
|
noise_mask = ( np.random.rand(length) <= noise_prob )
|
||||||
|
y[noise_mask] *= -1
|
||||||
|
return x, y
|
||||||
|
|
||||||
|
def decision_stump(x, y):
|
||||||
|
theta_seq = np.array([-1] + [(x[i]+x[i+1])/2 for i in range(x.shape[0]-1)])
|
||||||
|
|
||||||
|
best_Ein = 1e9
|
||||||
|
theta_ans = 0
|
||||||
|
sign_ans = 0
|
||||||
|
|
||||||
|
for theta in theta_seq:
|
||||||
|
Ein = [0, 0]
|
||||||
|
for s in [1, -1]:
|
||||||
|
h_of_x = s * np.sign(x-np.array([theta]*x.shape[0]))
|
||||||
|
index = 0 if s == 1 else 1
|
||||||
|
Ein[index] = (h_of_x != y).sum()
|
||||||
|
|
||||||
|
if min(Ein) < best_Ein:
|
||||||
|
best_Ein = min(Ein)
|
||||||
|
if Ein[0] < Ein[1]:
|
||||||
|
sign_ans = 1
|
||||||
|
theta_ans = theta
|
||||||
|
else:
|
||||||
|
sign_ans = -1
|
||||||
|
theta_ans = theta
|
||||||
|
|
||||||
|
return best_Ein/x.shape[0], theta_ans, sign_ans
|
||||||
|
|
||||||
|
Ein_log, Eout_log = [], []
|
||||||
|
for i in range(2000):
|
||||||
|
x, y = generate_data(32, 0.1)
|
||||||
|
# print(x, y)
|
||||||
|
Ein, theta, sign = decision_stump(x, y)
|
||||||
|
# print(Ein, theta, sign)
|
||||||
|
Ein_log.append(Ein)
|
||||||
|
Eout_log.append(0.5-0.4*sign+0.4*sign*abs(theta))
|
||||||
|
|
||||||
|
gap = sorted([ Eout_log[i]-Ein_log[i] for i in range(2000) ])
|
||||||
|
median = (gap[999]+gap[1000])/2
|
||||||
|
|
||||||
|
plt.scatter(Ein_log, Eout_log)
|
||||||
|
plt.xlabel("Ein")
|
||||||
|
plt.ylabel("Eout")
|
||||||
|
plt.title("median: {}".format(median))
|
||||||
|
plt.savefig("hw2_10.png")
|
||||||
|
plt.show()
|
||||||
BIN
hw2/hw2_11.png
Normal file
BIN
hw2/hw2_11.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 23 KiB |
55
hw2/hw2_11.py
Normal file
55
hw2/hw2_11.py
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
def generate_data(length, noise_prob):
|
||||||
|
x = np.random.uniform(-1, 1, (length, ))
|
||||||
|
x = np.sort(x)
|
||||||
|
y = np.sign(x)
|
||||||
|
noise_mask = ( np.random.rand(length) <= noise_prob )
|
||||||
|
y[noise_mask] *= -1
|
||||||
|
return x, y
|
||||||
|
|
||||||
|
def decision_stump(x, y):
|
||||||
|
theta_seq = np.array([-1] + [(x[i]+x[i+1])/2 for i in range(x.shape[0]-1)])
|
||||||
|
|
||||||
|
best_Ein = 1e9
|
||||||
|
theta_ans = 0
|
||||||
|
sign_ans = 0
|
||||||
|
|
||||||
|
for theta in theta_seq:
|
||||||
|
Ein = [0, 0]
|
||||||
|
for s in [1, -1]:
|
||||||
|
h_of_x = s * np.sign(x-np.array([theta]*x.shape[0]))
|
||||||
|
index = 0 if s == 1 else 1
|
||||||
|
Ein[index] = (h_of_x != y).sum()
|
||||||
|
|
||||||
|
if min(Ein) < best_Ein:
|
||||||
|
best_Ein = min(Ein)
|
||||||
|
if Ein[0] < Ein[1]:
|
||||||
|
sign_ans = 1
|
||||||
|
theta_ans = theta
|
||||||
|
else:
|
||||||
|
sign_ans = -1
|
||||||
|
theta_ans = theta
|
||||||
|
|
||||||
|
return best_Ein/x.shape[0], theta_ans, sign_ans
|
||||||
|
|
||||||
|
|
||||||
|
Ein_log, Eout_log = [], []
|
||||||
|
for i in range(2000):
|
||||||
|
x, y = generate_data(8, 0.1)
|
||||||
|
# print(x, y)
|
||||||
|
Ein, theta, sign = decision_stump(x, y)
|
||||||
|
# print(Ein, theta, sign)
|
||||||
|
Ein_log.append(Ein)
|
||||||
|
Eout_log.append(0.5-0.4*sign+0.4*sign*abs(theta))
|
||||||
|
|
||||||
|
gap = sorted([ Eout_log[i]-Ein_log[i] for i in range(2000) ])
|
||||||
|
median = (gap[999]+gap[1000])/2
|
||||||
|
|
||||||
|
plt.scatter(Ein_log, Eout_log)
|
||||||
|
plt.xlabel("Ein")
|
||||||
|
plt.ylabel("Eout")
|
||||||
|
plt.title("median: {}".format(median))
|
||||||
|
plt.savefig("hw2_11.png")
|
||||||
|
plt.show()
|
||||||
BIN
hw2/hw2_12.png
Normal file
BIN
hw2/hw2_12.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 30 KiB |
42
hw2/hw2_12.py
Normal file
42
hw2/hw2_12.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
def generate_data(length, noise_prob):
|
||||||
|
x = np.random.uniform(-1, 1, (length, ))
|
||||||
|
x = np.sort(x)
|
||||||
|
y = np.sign(x)
|
||||||
|
noise_mask = ( np.random.rand(length) <= noise_prob )
|
||||||
|
y[noise_mask] *= -1
|
||||||
|
return x, y
|
||||||
|
|
||||||
|
def decision_stump(x, y):
|
||||||
|
theta_seq = np.array([-1] + [(x[i]+x[i+1])/2 for i in range(x.shape[0]-1)])
|
||||||
|
|
||||||
|
best_Ein = 1e9
|
||||||
|
theta_ans = np.random.uniform(-1, 1, 1)[0]
|
||||||
|
sign_ans = ( np.random.uniform(-1, 1, 1)[0] > 0 )
|
||||||
|
|
||||||
|
h_of_x = sign_ans * np.sign(x-np.array([theta_ans]*x.shape[0]))
|
||||||
|
Ein = (h_of_x != y).sum()
|
||||||
|
|
||||||
|
return Ein/x.shape[0], theta_ans, sign_ans
|
||||||
|
|
||||||
|
|
||||||
|
Ein_log, Eout_log = [], []
|
||||||
|
for i in range(2000):
|
||||||
|
x, y = generate_data(8, 0.1)
|
||||||
|
# print(x, y)
|
||||||
|
Ein, theta, sign = decision_stump(x, y)
|
||||||
|
# print(Ein, theta, sign)
|
||||||
|
Ein_log.append(Ein)
|
||||||
|
Eout_log.append(0.5-0.4*sign+0.4*sign*abs(theta))
|
||||||
|
|
||||||
|
gap = sorted([ Eout_log[i]-Ein_log[i] for i in range(2000) ])
|
||||||
|
median = (gap[999]+gap[1000])/2
|
||||||
|
|
||||||
|
plt.scatter(Ein_log, Eout_log)
|
||||||
|
plt.xlabel("Ein")
|
||||||
|
plt.ylabel("Eout")
|
||||||
|
plt.title("median: {}".format(median))
|
||||||
|
plt.savefig("hw2_12.png")
|
||||||
|
plt.show()
|
||||||
Loading…
Reference in New Issue
Block a user