import numpy as np from liblinear.liblinearutil import * import math FILENAME = "hw4_train.dat" def read_data(filename): with open(filename) as fp: lines = fp.readlines() x, y = [], [] for line in lines: numbers = [ float(i) for i in line.split() ] x.append(numbers[:-1]) y.append(int(numbers[-1])) return x, y def format(features): ''' change to LIBSVM format ''' results = [] for feature in features: result = {} for index, value in enumerate(feature): if value != 0.0: result[index+1] = value results.append(result) return results def error(gt, pred): err = 0 for index in range(len(gt)): err = (err+1) if gt[index]!=pred[index] else err return err/len(gt) x, y = read_data(FILENAME) x = format(x) prob = problem(y, x) lambda_powers = [-6, -4, -2, 0, 2] results = [] for lambda_power in lambda_powers: lambda_value = 10 ** lambda_power param_C = 1/(2*lambda_value) param = parameter('-s 0 -c {} -e 0.000001 -q'.format(param_C)) model = train(prob, param) p_label, p_acc, p_val = predict(y, x, model) err = error(y, p_label) print("0/1 error: ", err) print() results.append({'lambda': lambda_power, 'error': err}) ans, min_err = None, 1 for i in results: if i['error'] <= min_err: min_err = i['error'] ans = i print("the largest lambda: {}, log_10(lambda*): {}".format(10**ans['lambda'], ans['lambda']))