diff --git a/hw2/hw2_10.png b/hw2/hw2_10.png new file mode 100644 index 0000000..72c739e Binary files /dev/null and b/hw2/hw2_10.png differ diff --git a/hw2/hw2_10.py b/hw2/hw2_10.py new file mode 100644 index 0000000..cb28e7b --- /dev/null +++ b/hw2/hw2_10.py @@ -0,0 +1,54 @@ +import numpy as np +import matplotlib.pyplot as plt + +def generate_data(length, noise_prob): + x = np.random.uniform(-1, 1, (length, )) + x = np.sort(x) + y = np.sign(x) + noise_mask = ( np.random.rand(length) <= noise_prob ) + y[noise_mask] *= -1 + return x, y + +def decision_stump(x, y): + theta_seq = np.array([-1] + [(x[i]+x[i+1])/2 for i in range(x.shape[0]-1)]) + + best_Ein = 1e9 + theta_ans = 0 + sign_ans = 0 + + for theta in theta_seq: + Ein = [0, 0] + for s in [1, -1]: + h_of_x = s * np.sign(x-np.array([theta]*x.shape[0])) + index = 0 if s == 1 else 1 + Ein[index] = (h_of_x != y).sum() + + if min(Ein) < best_Ein: + best_Ein = min(Ein) + if Ein[0] < Ein[1]: + sign_ans = 1 + theta_ans = theta + else: + sign_ans = -1 + theta_ans = theta + + return best_Ein/x.shape[0], theta_ans, sign_ans + +Ein_log, Eout_log = [], [] +for i in range(2000): + x, y = generate_data(32, 0.1) + # print(x, y) + Ein, theta, sign = decision_stump(x, y) + # print(Ein, theta, sign) + Ein_log.append(Ein) + Eout_log.append(0.5-0.4*sign+0.4*sign*abs(theta)) + +gap = sorted([ Eout_log[i]-Ein_log[i] for i in range(2000) ]) +median = (gap[999]+gap[1000])/2 + +plt.scatter(Ein_log, Eout_log) +plt.xlabel("Ein") +plt.ylabel("Eout") +plt.title("median: {}".format(median)) +plt.savefig("hw2_10.png") +plt.show() diff --git a/hw2/hw2_11.png b/hw2/hw2_11.png new file mode 100644 index 0000000..81c118b Binary files /dev/null and b/hw2/hw2_11.png differ diff --git a/hw2/hw2_11.py b/hw2/hw2_11.py new file mode 100644 index 0000000..ba1c5c7 --- /dev/null +++ b/hw2/hw2_11.py @@ -0,0 +1,55 @@ +import numpy as np +import matplotlib.pyplot as plt + +def generate_data(length, noise_prob): + x = np.random.uniform(-1, 1, (length, )) + x = np.sort(x) + y = np.sign(x) + noise_mask = ( np.random.rand(length) <= noise_prob ) + y[noise_mask] *= -1 + return x, y + +def decision_stump(x, y): + theta_seq = np.array([-1] + [(x[i]+x[i+1])/2 for i in range(x.shape[0]-1)]) + + best_Ein = 1e9 + theta_ans = 0 + sign_ans = 0 + + for theta in theta_seq: + Ein = [0, 0] + for s in [1, -1]: + h_of_x = s * np.sign(x-np.array([theta]*x.shape[0])) + index = 0 if s == 1 else 1 + Ein[index] = (h_of_x != y).sum() + + if min(Ein) < best_Ein: + best_Ein = min(Ein) + if Ein[0] < Ein[1]: + sign_ans = 1 + theta_ans = theta + else: + sign_ans = -1 + theta_ans = theta + + return best_Ein/x.shape[0], theta_ans, sign_ans + + +Ein_log, Eout_log = [], [] +for i in range(2000): + x, y = generate_data(8, 0.1) + # print(x, y) + Ein, theta, sign = decision_stump(x, y) + # print(Ein, theta, sign) + Ein_log.append(Ein) + Eout_log.append(0.5-0.4*sign+0.4*sign*abs(theta)) + +gap = sorted([ Eout_log[i]-Ein_log[i] for i in range(2000) ]) +median = (gap[999]+gap[1000])/2 + +plt.scatter(Ein_log, Eout_log) +plt.xlabel("Ein") +plt.ylabel("Eout") +plt.title("median: {}".format(median)) +plt.savefig("hw2_11.png") +plt.show() diff --git a/hw2/hw2_12.png b/hw2/hw2_12.png new file mode 100644 index 0000000..8ab1ff7 Binary files /dev/null and b/hw2/hw2_12.png differ diff --git a/hw2/hw2_12.py b/hw2/hw2_12.py new file mode 100644 index 0000000..75cc8bc --- /dev/null +++ b/hw2/hw2_12.py @@ -0,0 +1,42 @@ +import numpy as np +import matplotlib.pyplot as plt + +def generate_data(length, noise_prob): + x = np.random.uniform(-1, 1, (length, )) + x = np.sort(x) + y = np.sign(x) + noise_mask = ( np.random.rand(length) <= noise_prob ) + y[noise_mask] *= -1 + return x, y + +def decision_stump(x, y): + theta_seq = np.array([-1] + [(x[i]+x[i+1])/2 for i in range(x.shape[0]-1)]) + + best_Ein = 1e9 + theta_ans = np.random.uniform(-1, 1, 1)[0] + sign_ans = ( np.random.uniform(-1, 1, 1)[0] > 0 ) + + h_of_x = sign_ans * np.sign(x-np.array([theta_ans]*x.shape[0])) + Ein = (h_of_x != y).sum() + + return Ein/x.shape[0], theta_ans, sign_ans + + +Ein_log, Eout_log = [], [] +for i in range(2000): + x, y = generate_data(8, 0.1) + # print(x, y) + Ein, theta, sign = decision_stump(x, y) + # print(Ein, theta, sign) + Ein_log.append(Ein) + Eout_log.append(0.5-0.4*sign+0.4*sign*abs(theta)) + +gap = sorted([ Eout_log[i]-Ein_log[i] for i in range(2000) ]) +median = (gap[999]+gap[1000])/2 + +plt.scatter(Ein_log, Eout_log) +plt.xlabel("Ein") +plt.ylabel("Eout") +plt.title("median: {}".format(median)) +plt.savefig("hw2_12.png") +plt.show()