NTU_HTML/hw4/hw4_10.py
2023-11-15 20:46:06 +08:00

91 lines
2.4 KiB
Python

import numpy as np
from liblinear.liblinearutil import *
import math
FILENAME = "hw4_train.dat"
def read_data(filename):
with open(filename) as fp:
lines = fp.readlines()
x, y = [], []
for line in lines:
numbers = [ float(i) for i in line.split() ]
x.append(numbers[:-1])
y.append(int(numbers[-1]))
return x, y
def form(features):
'''
change to LIBSVM format
'''
results = []
for feature in features:
result = {}
for index, value in enumerate(feature):
if value != 0.0:
result[index+1] = value
results.append(result)
return results
def error(gt, pred):
err = 0
for index in range(len(gt)):
err = (err+1) if gt[index]!=pred[index] else err
return err/len(gt)
def transform(features):
output_features = []
for index, feature in enumerate(features):
output_features.append([ 0 for _ in range(84) ])
output_features[index][0] = 1
d_index = 1
# 1-order
for i in feature:
output_features[index][d_index] = i
d_index += 1
# 2-orde
for i in range(len(feature)):
for j in range(i, len(feature)):
output_features[index][d_index] = feature[i]*feature[j]
d_index += 1
# 3-order
for i in range(len(feature)):
for j in range(i, len(feature)):
for k in range(j, len(feature)):
output_features[index][d_index] = i*j*k
d_index += 1
return output_features
x, y = read_data(FILENAME)
x = transform(x)
x = form(x)
prob = problem(y, x)
lambda_powers = [-6, -4, -2, 0, 2]
results = []
for lambda_power in lambda_powers:
lambda_value = 10 ** lambda_power
param_C = 1/(2*lambda_value)
param = parameter('-s 0 -c {} -e 0.000001 -q'.format(param_C))
model = train(prob, param)
p_label, p_acc, p_val = predict(y, x, model)
err = error(y, p_label)
print("0/1 error: ", err)
print()
results.append({'lambda': lambda_power, 'error': err})
ans, min_err = None, 1
for i in results:
print(i['error'])
if i['error'] <= min_err:
min_err = i['error']
ans = i
print("the largest lambda: {}, log_10(lambda*): {}".format(10**ans['lambda'], ans['lambda']))