import numpy as np from liblinear.liblinearutil import * import math FILENAME = "hw4_train.dat" def read_data(filename): with open(filename) as fp: lines = fp.readlines() x, y = [], [] for line in lines: numbers = [ float(i) for i in line.split() ] x.append(numbers[:-1]) y.append(int(numbers[-1])) return x, y def form(features): ''' change to LIBSVM format ''' results = [] for feature in features: result = {} for index, value in enumerate(feature): if value != 0.0: result[index+1] = value results.append(result) return results def error(gt, pred): err = 0 for index in range(len(gt)): err = (err+1) if gt[index]!=pred[index] else err return err/len(gt) def transform(features): output_features = [] for index, feature in enumerate(features): output_features.append([ 0 for _ in range(84) ]) output_features[index][0] = 1 d_index = 1 # 1-order for i in feature: output_features[index][d_index] = i d_index += 1 # 2-orde for i in range(len(feature)): for j in range(i, len(feature)): output_features[index][d_index] = feature[i]*feature[j] d_index += 1 # 3-order for i in range(len(feature)): for j in range(i, len(feature)): for k in range(j, len(feature)): output_features[index][d_index] = i*j*k d_index += 1 return output_features x, y = read_data(FILENAME) x = transform(x) x = form(x) prob = problem(y, x) lambda_powers = [-6, -4, -2, 0, 2] results = [] for lambda_power in lambda_powers: lambda_value = 10 ** lambda_power param_C = 1/(2*lambda_value) param = parameter('-s 0 -c {} -e 0.000001 -q'.format(param_C)) model = train(prob, param) p_label, p_acc, p_val = predict(y, x, model) err = error(y, p_label) print("0/1 error: ", err) print() results.append({'lambda': lambda_power, 'error': err}) ans, min_err = None, 1 for i in results: print(i['error']) if i['error'] <= min_err: min_err = i['error'] ans = i print("the largest lambda: {}, log_10(lambda*): {}".format(10**ans['lambda'], ans['lambda']))