66 lines
1.6 KiB
Python
66 lines
1.6 KiB
Python
import numpy as np
|
|
from liblinear.liblinearutil import *
|
|
import math
|
|
|
|
FILENAME = "hw4_train.dat"
|
|
|
|
def read_data(filename):
|
|
with open(filename) as fp:
|
|
lines = fp.readlines()
|
|
x, y = [], []
|
|
for line in lines:
|
|
numbers = [ float(i) for i in line.split() ]
|
|
x.append(numbers[:-1])
|
|
y.append(int(numbers[-1]))
|
|
return x, y
|
|
|
|
def form(features):
|
|
'''
|
|
change to LIBSVM format
|
|
'''
|
|
results = []
|
|
for feature in features:
|
|
result = {}
|
|
for index, value in enumerate(feature):
|
|
if value != 0.0:
|
|
result[index+1] = value
|
|
results.append(result)
|
|
return results
|
|
|
|
def error(gt, pred):
|
|
err = 0
|
|
for index in range(len(gt)):
|
|
err = (err+1) if gt[index]!=pred[index] else err
|
|
return err/len(gt)
|
|
|
|
if __name__ == '__main__':
|
|
x, y = read_data(FILENAME)
|
|
x = form(x)
|
|
prob = problem(y, x)
|
|
lambda_powers = [-6, -4, -2, 0, 2]
|
|
|
|
results = []
|
|
for lambda_power in lambda_powers:
|
|
lambda_value = 10 ** lambda_power
|
|
param_C = 1/(2*lambda_value)
|
|
param = parameter('-s 0 -c {} -e 0.000001 -q'.format(param_C))
|
|
model = train(prob, param)
|
|
p_label, p_acc, p_val = predict(y, x, model)
|
|
err = error(y, p_label)
|
|
print("0/1 error: ", err)
|
|
print()
|
|
results.append({'lambda': lambda_power, 'error': err})
|
|
|
|
ans, min_err = None, 1
|
|
for i in results:
|
|
print(i['error'])
|
|
if i['error'] <= min_err:
|
|
min_err = i['error']
|
|
ans = i
|
|
|
|
print("the largest lambda: {}, log_10(lambda*): {}".format(10**ans['lambda'], ans['lambda']))
|
|
|
|
|
|
|
|
|