feat: complete hw4

This commit is contained in:
snsd0805 2023-11-14 02:26:08 +08:00
parent b3ddd2d11e
commit ab0f64d364
Signed by: snsd0805
GPG Key ID: 569349933C77A854
3 changed files with 251 additions and 0 deletions

63
hw4/hw4_10.py Normal file
View File

@ -0,0 +1,63 @@
import numpy as np
from liblinear.liblinearutil import *
import math
FILENAME = "hw4_train.dat"
def read_data(filename):
with open(filename) as fp:
lines = fp.readlines()
x, y = [], []
for line in lines:
numbers = [ float(i) for i in line.split() ]
x.append(numbers[:-1])
y.append(int(numbers[-1]))
return x, y
def format(features):
'''
change to LIBSVM format
'''
results = []
for feature in features:
result = {}
for index, value in enumerate(feature):
if value != 0.0:
result[index+1] = value
results.append(result)
return results
def error(gt, pred):
err = 0
for index in range(len(gt)):
err = (err+1) if gt[index]!=pred[index] else err
return err/len(gt)
x, y = read_data(FILENAME)
x = format(x)
prob = problem(y, x)
lambda_powers = [-6, -4, -2, 0, 2]
results = []
for lambda_power in lambda_powers:
lambda_value = 10 ** lambda_power
param_C = 1/(2*lambda_value)
param = parameter('-s 0 -c {} -e 0.000001 -q'.format(param_C))
model = train(prob, param)
p_label, p_acc, p_val = predict(y, x, model)
err = error(y, p_label)
print("0/1 error: ", err)
print()
results.append({'lambda': lambda_power, 'error': err})
ans, min_err = None, 1
for i in results:
if i['error'] <= min_err:
min_err = i['error']
ans = i
print("the largest lambda: {}, log_10(lambda*): {}".format(10**ans['lambda'], ans['lambda']))

83
hw4/hw4_11.py Normal file
View File

@ -0,0 +1,83 @@
import numpy as np
import datetime
import random
from liblinear.liblinearutil import *
import matplotlib.pyplot as plt
FILENAME = "hw4_train.dat"
def read_data(filename):
with open(filename) as fp:
lines = fp.readlines()
x, y = [], []
for line in lines:
numbers = [ float(i) for i in line.split() ]
x.append(numbers[:-1])
y.append(int(numbers[-1]))
return x, y
def format(features):
'''
change to LIBSVM format
'''
results = []
for feature in features:
result = {}
for index, value in enumerate(feature):
if value != 0.0:
result[index+1] = value
results.append(result)
return results
def error(gt, pred):
err = 0
for index in range(len(gt)):
err = (err+1) if gt[index]!=pred[index] else err
return err/len(gt)
def new_split(x, y):
random.seed(datetime.datetime.now().timestamp())
data = list(zip(x, y))
random.shuffle(data)
x, y = zip(*data)
train_x, val_x = x[:120], x[120:]
train_y, val_y = y[:120], y[120:]
return (train_x, train_y), (val_x, val_y)
x, y = read_data(FILENAME)
x = format(x)
log_lambda = []
for _ in range(128):
(train_x, train_y), (val_x, val_y) = new_split(x, y)
prob = problem(train_y, train_x)
lambda_powers = [-6, -4, -2, 0, 2]
results = []
for lambda_power in lambda_powers:
lambda_value = 10 ** lambda_power
param_C = 1/(2*lambda_value)
param = parameter('-s 0 -c {} -e 0.000001 -q'.format(param_C))
model = train(prob, param)
p_label, p_acc, p_val = predict(val_y, val_x, model)
err = error(val_y, p_label)
print("0/1 error: ", err)
print()
results.append({'lambda': lambda_power, 'error': err})
ans, min_err = None, 1
for i in results:
if i['error'] <= min_err:
min_err = i['error']
ans = i
print("the largest lambda: {}, log_10(lambda*): {}".format(10**ans['lambda'], ans['lambda']))
print()
log_lambda.append(ans['lambda'])
plt.hist(log_lambda)
plt.savefig("hw4_11.png")

105
hw4/hw4_12.py Normal file
View File

@ -0,0 +1,105 @@
import numpy as np
import datetime
import random
from liblinear.liblinearutil import *
import matplotlib.pyplot as plt
FILENAME = "hw4_train.dat"
def read_data(filename):
with open(filename) as fp:
lines = fp.readlines()
x, y = [], []
for line in lines:
numbers = [ float(i) for i in line.split() ]
x.append(numbers[:-1])
y.append(int(numbers[-1]))
return x, y
def format(features):
'''
change to LIBSVM format
'''
results = []
for feature in features:
result = {}
for index, value in enumerate(feature):
if value != 0.0:
result[index+1] = value
results.append(result)
return results
def error(gt, pred):
err = 0
for index in range(len(gt)):
err = (err+1) if gt[index]!=pred[index] else err
return err/len(gt)
def new_split(x, y):
random.seed(datetime.datetime.now().timestamp())
data = list(zip(x, y))
random.shuffle(data)
x, y = zip(*data)
folds = []
head, tail = 0, 40
while head < len(x):
folds.append(
(x[head:tail], y[head:tail])
)
head += 40
tail += 40
return folds
x, y = read_data(FILENAME)
x = format(x)
log_lambda = []
lambda_powers = [-6, -4, -2, 0, 2]
for _ in range(128):
folds = new_split(x, y)
errors = [ 0 for _ in range(len(lambda_powers)) ]
results = []
for val_index in range(len(folds)):
train_x, train_y = [], []
val_x, val_y = [], []
for i in range(len(folds)):
if i == val_index:
val_x = folds[i][0]
val_y = folds[i][1]
else:
train_x += folds[i][0]
train_y += folds[i][1]
prob = problem(train_y, train_x)
for index, lambda_power in enumerate(lambda_powers):
lambda_value = 10 ** lambda_power
param_C = 1/(2*lambda_value)
param = parameter('-s 0 -c {} -e 0.000001 -q'.format(param_C))
model = train(prob, param)
p_label, p_acc, p_val = predict(val_y, val_x, model)
err = error(val_y, p_label)
errors[index] += err
for index, lambda_power in enumerate(lambda_powers):
results.append({'lambda': lambda_power, 'error': errors[index]/len(folds)})
ans, min_err = None, 1
for i in results:
if i['error'] <= min_err:
min_err = i['error']
ans = i
print("the largest lambda: {}, log_10(lambda*): {}".format(10**ans['lambda'], ans['lambda']))
print()
log_lambda.append(ans['lambda'])
plt.hist(log_lambda)
plt.savefig("hw4_12.png")