-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathocr.cpp
More file actions
462 lines (409 loc) · 12 KB
/
ocr.cpp
File metadata and controls
462 lines (409 loc) · 12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
/*A character recognizer that uses neural nets
TODO: JC T and Alma Sanchez, 10/2014
assignment and helper code by Michael Black, 10/2014
TODO:
YOUR CODE WILL GO IN FUNCTIONS test() AND train()
HERE STATE WHAT STEPS YOU ACCOMPLISHED
usage:
ocr sample X
pops up a window, user draws an example of an X, user doubleclicks and the X is saved for later
ocr train
builds a neural net for each letter type, trains each of them on the samples until they predict perfectly
ocr test
pops up a window, user draws a letter and doubleclicks, the program tries to guess which letter was drawn
*/
// trained in this order
//A = angry >=| mouth 2nd row from the bottom
//H = Happy =) one column in from both sides
//S = sad face =( 3 squares in from left and right
//W = weird face 0_o mouth is the 5th row from the bottom
//E = extremely happy face =D 2nd and 3rd rows from left and right, mouth starts 4 rows under the eyes
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <QtGui>
#include <math.h>
#include <sstream>
//graphics defs here
#include "ocr.h"
//neural network is defined here. assumes neural.cpp is in the same directory too.
#include "neural.h"
using namespace std;
int* getSquares();
//some global vars:
//squares wide
const int GRIDWIDTH=10;
//squares tall
const int GRIDHEIGHT=20;
//the window is this many pixels wide
const int SCREENWIDTH=400;
//and pixels tall
const int SCREENHEIGHT=400;
//an array of clickable grid squares
Square* square[GRIDWIDTH][GRIDHEIGHT];
//the window background
QGraphicsScene* thescene;
OCRView* view;
//keep track of the command line input
//operation is "sample" "train" or "test"
//symbol is the letter entered when the operation is "sample"
string operation,symbol;
//called immediately on "ocr train"
//reads the images in ocrdata.txt, builds a set of neural nets, trains them, and saves the weights to perceptron.txt
void train(){
//read the images from file ocrdata.txt
ifstream datafile; //file object
string line; //lines will go here
datafile.open("ocrdata.txt"); //open the file
//stop the program if the file isn't found
if (!datafile.is_open()){
cout<<"Couldn't open ocrdata.txt"<<endl;
return;
}
//array of unique characters/faces
char faces[25];
int facecount = 0;
int linecount=0; //keep track of how many samples are in the file
//go through the file and just count the number of samples
while(getline(datafile,line)){
linecount++;
//populate faces with unique characters/faces
bool addtofaces=true;
for (int i=0; i< facecount; i++){
//loop through faces, if line[0] is already in it, don't add
if (line[0] == faces[i])
addtofaces = false;
}
if (addtofaces){
faces[facecount]=line[0];
facecount++;
}
}
/*for (int i = 0; i < facecount; i++)
{
cout << "FACES " << i << " " << faces[i]<< endl;
}*/
datafile.close();
//make an array to hold the samples
int sample_input[linecount][GRIDWIDTH*GRIDHEIGHT];
//make another array to hold the output letter for each sample
char sample_output[linecount];
//reopen the file
datafile.open("ocrdata.txt");
//for each sample,
for(int i=0; i<linecount; i++){
//read it from the file
getline(datafile,line);
//the first character is the output letter
sample_output[i]=line[0];
//then a space, then a 1 or 0 for each square on the screen
for (int j=0; j<GRIDWIDTH*GRIDHEIGHT; j++){
sample_input[i][j]=line[j+2]=='1'?1:0;
}
}
//now we're done with ocrdata.txt
datafile.close();
//TODO: MAKE SOME NEURAL NETS AND TRAIN THEM HERE, THEN SAVE THE WEIGHTS TO perceptron.txt
Perceptron* neurons[facecount];
//create array of perceptrons
for(int i=0; i<facecount; i++){
neurons[i] = new Perceptron(GRIDWIDTH*GRIDHEIGHT);
}
for (int f=0; f<facecount; f++)
{ //Do the training
bool isCorrect=false;
while (!isCorrect)//trains for one letter
{
isCorrect = true;//assume it's true
for (int i = 0; i < linecount; i++)//go through each line
{
if (sample_output[i] == faces[f])//if this line has the letter want
{
// cout<<"HELLO"<<endl;
if (!neurons[f]->train(sample_input[i], 1))//if we get a false, restart
isCorrect = false;
}
else
{
if (!neurons[f]->train(sample_input[i], 0))
isCorrect = false;
}
}
}
}
for (int j = 0; j < facecount; j++)
{ //display the training
cout<<endl;
for (int i = 0; i < linecount; i++)
{
cout << "Neuron "<< faces[j] << "'s' ";
cout << "Prediction for " << sample_output[i] << " is " << neurons[j]->getPrediction(sample_input[i]) << endl;
}
}
ofstream outdatafile;
outdatafile.open("perceptron.txt",ios::out); //ios::out | ios::app
for (int g = 0; g < facecount; g++)
{ //store output weights and hidden weights into a text file
for (int i = 0; i < neurons[g]->size+1; ++i)
{
//line 1 of output file is the output weights
outdatafile << neurons[g]->outputweight[i] << " " ;
}
outdatafile << endl;
for (int i = 0; i < neurons[g]->size; ++i)
{
for (int j = 0; j < neurons[g]->size+1; ++j)
{
//line 2 of output file is hidden weights
outdatafile << neurons[g]->hiddenweight[i][j] << " " ;
}
}
outdatafile << endl;
}
outdatafile.close();
cout << "Wrote sample to perceptron.txt" << endl;
ofstream outputletters;
outputletters.open("perceptronletters.txt",ios::out);
for (int i = 0; i < facecount; i++)
{ // store faces/unique characters array in another text file for use in test.
outputletters << faces[i] << endl;
}
cout << "Wrote faceletters to perceptronletters.txt" << endl;
outputletters.close();
}
//called on "ocr test", after the user draws and double-clicks the mouse
void test(){
//TODO: MAKE SOME NEURAL NETS, READ THE WEIGHTS FROM A FILE perceptron.txt, USE THE NEURAL NETS TO IDENTIFY THE LETTER
ifstream datafile; //file object
string line; //lines will go here
int neuroncount=0; //keep track of how many samples are in the file
datafile.open("perceptron.txt");
if (!datafile.is_open()){
cout<<"Couldn't open perceptron.txt"<<endl;
return;
}
while(getline(datafile,line)){
//go through the file and just count the number of lines
neuroncount++;
}
datafile.close();
//one line for output, one light for hidden, 2 lines for each perceptron
neuroncount = neuroncount/2;
Perceptron* neurons[neuroncount];
datafile.open("perceptron.txt");
for (int k = 0; k < neuroncount; k++)
{ // create a neuron and populate it with the required two lines from the file
neurons[k] = new Perceptron(GRIDWIDTH*GRIDHEIGHT);
getline(datafile,line); // first line is output weights
istringstream iss(line);
float n;
for (int i = 0; i < neurons[k]->size+1; i++)
{
iss >> n;
neurons[k] -> outputweight[i] = n;
}
getline(datafile,line); // second line is hiddenweights
istringstream iss2(line);
for (int i = 0; i < neurons[k]->size; i++)
{
for (int j = 0; j < neurons[k]->size+1; j++)
{
iss2 >> n;
neurons[k] -> hiddenweight[i][j] = n;
}
}
}
datafile.close();
//Need face data to be able to define different perceptrons.
ifstream letterfile;
char faces[25];
int facecount=0;
letterfile.open("perceptronletters.txt");
while(getline(letterfile,line)){
faces[facecount]=line[0];
facecount++;
}
/*for (int i = 0; i < facecount; i++){
cout << "FACE " << i << " " << faces[i] <<endl;
}*/
//store getSquares for multiple use
int* squares = new int[GRIDWIDTH*GRIDHEIGHT];
squares = getSquares();
float* rawPredictions = new float[10];
float guess = 0.0;
int faceGuess = 0;
for (int r = 0; r < facecount; r++){
rawPredictions[r] = neurons[r]->getRawPrediction(squares);
//cout<< "PREDICTION OF "<< r << faces[r] <<" is " << rawPredictions[r] <<endl;
if(rawPredictions[r]>guess){
guess = rawPredictions[r];
faceGuess = r;
}
}
//Make the right guess, chose max out of numbers being printed
cout<< "Is it " << faces[faceGuess] << "?" << endl;
}
//read the contents of the grid and save them to the end of ocrdata.txt
void saveSample(){
ofstream datafile;
datafile.open("ocrdata.txt",ios::out|ios::app);
datafile << symbol << " ";
int* s=getSquares();
for(int i=0; i<GRIDWIDTH*GRIDHEIGHT; i++)
datafile<<s[i];
datafile << endl;
datafile.close();
cout << "Wrote sample to ocrdata.txt" << endl;
}
//determines what function is called when the user double clicks the window, based on the command line input
void doOperation()
{
if(operation.compare("sample")==0)
saveSample();
else if (operation.compare("train")==0)
train();
else if (operation.compare("test")==0)
test();
}
//returns an integer array of 0s and 1s that represents what the user drew on the window
int* getSquares()
{
int* s=new int[GRIDWIDTH*GRIDHEIGHT];
for(int i=0; i<GRIDWIDTH; i++)
for(int j=0; j<GRIDHEIGHT; j++)
s[i*GRIDHEIGHT+j]=square[i][j]->selected?1:0;
return s;
}
//constructor for a grid square
Square::Square(int x, int y)
{
xpos=x; ypos=y;
xcoor1=x*SCREENWIDTH/GRIDWIDTH;
ycoor1=y*SCREENHEIGHT/GRIDHEIGHT;
xcoor2=(x+1)*SCREENWIDTH/GRIDWIDTH;
ycoor2=(y+1)*SCREENHEIGHT/GRIDHEIGHT;
selected=false;
}
//location of the square on the screen, used for rendering
QRectF Square::boundingRect() const
{
return QRectF(xcoor1,ycoor1,xcoor2-xcoor1,ycoor2-ycoor1);
}
//called when the user selects the square
void Square::click()
{
selected=true;
update();
}
//render the square on the screen
void Square::paint(QPainter *painter, const QStyleOptionGraphicsItem *, QWidget *)
{
QColor color;
if(selected)
{
//if the user clicked it, draw it purple
color.setBlue(100); color.setRed(100); color.setGreen(0);
}
else
{
//otherwise draw it white
color.setBlue(255); color.setRed(255); color.setGreen(255);
}
painter->setBrush(color);
painter->drawRect(xcoor1,ycoor1,xcoor2-xcoor1,ycoor2-ycoor1);
}
OCRView::OCRView(QGraphicsScene *scene, QWidget* parent):QGraphicsView(scene, parent)
{
//the mouse is initially assumed to be unpressed
isPressed=false;
}
//the mouse has been pressed or dragged
//figure out which grid square was selected and call it
void squareClickEvent(QMouseEvent *event)
{
int x=(event->x()*GRIDWIDTH/SCREENWIDTH);
int y=(event->y()*GRIDHEIGHT/SCREENHEIGHT);
if(x<0 || y<0 || x>=GRIDWIDTH || y>=GRIDHEIGHT) return;
square[x][y]->click();
}
//if the mouse button is down and the user moves over a square, select it
void OCRView::mouseMoveEvent(QMouseEvent *event)
{
if(!isPressed) return;
squareClickEvent(event);
}
//if the user presses the mouse on a square, select it
void OCRView::mousePressEvent(QMouseEvent *event)
{
isPressed=true;
squareClickEvent(event);
}
//the mouse is released, stop selecting squares
void OCRView::mouseReleaseEvent(QMouseEvent *event)
{
isPressed=false;
}
//double click means that we're done drawing
void OCRView::mouseDoubleClickEvent(QMouseEvent *event)
{
if(event->button() == Qt::RightButton)
{
// a right-button double click just exits the program
exit(0);
}
if(event->button() == Qt::LeftButton)
{
//a left-button double click saves the sample or tests, depending on the command line parameters
doOperation();
exit(0);
}
}
//program starts here
int main(int argc, char **argv)
{
//save the command line arguments
if(argc>=2)
operation=argv[1];
if(argc==3)
symbol=argv[2];
else
symbol=" ";
//if the user didn't give any, print out some help and stop
if(argc==1)
{
cout << "Usage: " << endl;
cout << " ocr sample A" << endl;
cout << " ocr train" << endl;
cout << " ocr test" << endl;
exit(0);
}
//seed the random number generator
qsrand(QTime(0,0,0).secsTo(QTime::currentTime()));
//we don't need to make a window if the user selects "train"
if(operation.compare("train")==0)
{
train();
exit(0);
}
//make a window
QApplication app(argc,argv);
thescene=new QGraphicsScene();
thescene->setSceneRect(0,0,SCREENWIDTH+100,SCREENHEIGHT+100);
//add in all the squares
for(int i=0; i<GRIDWIDTH; i++)
{
for(int j=0; j<GRIDHEIGHT; j++)
{
square[i][j] = new Square(i,j);
thescene->addItem(square[i][j]);
}
}
view=new OCRView(thescene);
view->setWindowTitle("Neural OCR");
view->resize(SCREENWIDTH+100,SCREENHEIGHT+100);
view->show();
view->setMouseTracking(true);
//make the window visible. we're done with setup.
return app.exec();
}