### # recognize_9.py # # author: Kristina Striegnitz # # version: 3/4/2010 # # Trains a neural network to recognize whether a handwritten digit is # a 9 or not. I.e., the input is a numeric representation of an image # (see optdigits.names for more info) and the output is 0 (= "not a # 9") or 1 (= "yes, it's a 9"). # ### from pybrain.datasets import SupervisedDataSet from pybrain.tools.shortcuts import buildNetwork from pybrain.supervised import BackpropTrainer def normalize_inputs(inputs): normalized = [] for i in inputs: normalized += [float(i)/16] return normalized def make_dataset(filename, size): """ Creates a set of training or testing data from a file. Each line in the file is one example. The first 64 numbers on the line are inputs representing an image of a digit, the last number says which digit it is. This function creates a dataset which only maps inputs to either 1 or 0, where 1 means 'this digit is a 9' and 0 means 'this image is another digit not equal to 9'. This function also takes a size parameter which indicates the maximum size of the dataset. That is, if the file has more examples than the given number, we don't use the whole file. """ data = SupervisedDataSet(64,1) fobj = open(filename, "r") line = fobj.readline() counter = 1 while line != "" and counter < size: stripped = line.strip() attributes = line.split(",") for index in range(0,len(attributes)): attributes[index] = int(attributes[index]) # Normalize the input values to be between 0 and 1. inputs = normalize_inputs(attributes[:-1]) if attributes[-1] == 9: outputs = [1] else: outputs = [0] data.addSample(inputs, outputs) line = fobj.readline() counter += 1 fobj.close() return data def training(d): """ Builds a network and trains it. """ n = buildNetwork(d.indim, 4, d.outdim, recurrent=True) t = BackpropTrainer(n, d, learningrate = 0.3, momentum = 0.3, verbose = True) for epoch in range(0,100): if epoch % 20 == 0: print "Epoch:", epoch t.train() return t def testing(trained, testdata): """ Builds a new test dataset and tests the trained network on it. """ trained.testOnData(testdata, verbose= True) def run(): """ Use this function to run build, train, and test your neural network. """ training_file = "optdigits.tra" testing_file = "optdigits.tes" trainingdata = make_dataset(training_file, 2000) trained = training(trainingdata) testdata = make_dataset(testing_file, 100) testing(trained, testdata)