deep learning(1)BP神经网络原理与练习

具体原理参考如下讲义:

1、神经网络

2、反向传导

3、梯度检验与高级优化

看完材料1和2就可以梳理清楚bp神经网络的基本工作原理,下面通过一个C语言实现的程序来练习这个算法

  1 //Backpropagation, 25x25x8 units, binary sigmoid function network
  2 //Written by Thomas Riga, University of Genoa, Italy
  3 //[email protected]
  4
  5 #include <iostream>
  6 #include <fstream>
  7 #include <conio.h>
  8 #include <stdlib.h>
  9 #include <math.h>
 10 #include <ctype.h>
 11 #include <stdio.h>
 12 #include <float.h>
 13 using namespace std;
 14
 15 double **input,
 16     *hidden,
 17     **output,
 18     **target,
 19     *bias,
 20     **weight_i_h,
 21     **weight_h_o,
 22     *errorsignal_hidden,
 23     *errorsignal_output;
 24
 25 int input_array_size,
 26     hidden_array_size,
 27     output_array_size,
 28     max_patterns,
 29     bias_array_size,
 30     gaset = -2500,
 31     number_of_input_patterns,
 32     pattern,
 33     file_loaded = 0,
 34     ytemp = 0,
 35     ztemp = 0;
 36 double learning_rate,
 37     max_error_tollerance = 0.1;
 38 char filename[128];
 39 #define IA   16807
 40 #define IM   2147483647
 41 #define AM   (1.0 / IM)
 42 #define IQ   127773
 43 #define IR   2836
 44 #define NTAB 32
 45 #define NDIV (1+(IM-1) / NTAB)
 46 #define EPS  1.2e-7
 47 #define RNMX (1.0 - EPS)
 48 int compare_output_to_target();
 49 void load_data(char *arg);
 50 void save_data(char *argres);
 51 void forward_pass(int pattern);
 52 void backward_pass(int pattern);
 53 void custom();
 54 void compute_output_pattern();
 55 void get_file_name();
 56 float bedlam(long *idum);
 57 void learn();
 58 void make();
 59 void test();
 60 void print_data();
 61 void print_data_to_screen();
 62 void print_data_to_file();
 63 void output_to_screen();
 64 int getnumber();
 65 void change_learning_rate();
 66 void initialize_net();
 67 void clear_memory();
 68
 69 int main()
 70 {
 71     cout << "backpropagation network by Thomas Riga, University of Genoa, Italy" << endl;
 72     for(;;) {
 73         char choice;
 74         cout << endl << "1. load data" << endl;
 75         cout << "2. learn from data" << endl;
 76         cout << "3. compute output pattern" << endl;
 77         cout << "4. make new data file" << endl;
 78         cout << "5. save data" << endl;
 79         cout << "6. print data" << endl;
 80         cout << "7. change learning rate" << endl;
 81         cout << "8. exit" << endl << endl;
 82         cout << "Enter your choice (1-8)";
 83         do { choice = getch(); } while (choice != ‘1‘ && choice != ‘2‘ && choice != ‘3‘ && choice != ‘4‘ && choice != ‘5‘ && choice != ‘6‘ && choice != ‘7‘ && choice != ‘8‘);
 84         switch(choice) {
 85         case ‘1‘:
 86             {
 87                 if (file_loaded == 1) clear_memory();
 88                 get_file_name();
 89                 file_loaded = 1;
 90                 load_data(filename);
 91             }
 92             break;
 93         case ‘2‘: learn();
 94             break;
 95         case ‘3‘: compute_output_pattern();
 96             break;
 97         case ‘4‘: make();
 98             break;
 99         case ‘5‘:
100             {
101                 if (file_loaded == 0)
102                 {
103                     cout << endl
104                         << "there is no data loaded into memory"
105                         << endl;
106                     break;
107                 }
108                 cout << endl << "enter a filename to save data to: ";
109                 cin >> filename;
110                 save_data(filename);
111             }
112             break;
113         case ‘6‘: print_data();
114             break;
115         case ‘7‘: change_learning_rate();
116             break;
117         case ‘8‘: return 0;
118         };
119     }
120 }
121
122 void initialize_net()
123 {
124     int x;
125     input = new double * [number_of_input_patterns];
126     if(!input) { cout << endl << "memory problem!"; exit(1); }
127     for(x=0; x<number_of_input_patterns; x++)
128     {
129         input[x] = new double [input_array_size];
130         if(!input[x]) { cout << endl << "memory problem!"; exit(1); }
131     }
132     hidden = new double [hidden_array_size];
133     if(!hidden) { cout << endl << "memory problem!"; exit(1); }
134     output = new double * [number_of_input_patterns];
135     if(!output) { cout << endl << "memory problem!"; exit(1); }
136     for(x=0; x<number_of_input_patterns; x++)
137     {
138         output[x] = new double [output_array_size];
139         if(!output[x]) { cout << endl << "memory problem!"; exit(1); }
140     }
141     target = new double * [number_of_input_patterns];
142     if(!target) { cout << endl << "memory problem!"; exit(1); }
143     for(x=0; x<number_of_input_patterns; x++)
144     {
145         target[x] = new double [output_array_size];
146         if(!target[x]) { cout << endl << "memory problem!"; exit(1); }
147     }
148     bias = new double [bias_array_size];
149     if(!bias) { cout << endl << "memory problem!"; exit(1); }
150     weight_i_h = new double * [input_array_size];
151     if(!weight_i_h) { cout << endl << "memory problem!"; exit(1); }
152     for(x=0; x<input_array_size; x++)
153     {
154         weight_i_h[x] = new double [hidden_array_size];
155         if(!weight_i_h[x]) { cout << endl << "memory problem!"; exit(1); }
156     }
157     weight_h_o = new double * [hidden_array_size];
158     if(!weight_h_o) { cout << endl << "memory problem!"; exit(1); }
159     for(x=0; x<hidden_array_size; x++)
160     {
161         weight_h_o[x] = new double [output_array_size];
162         if(!weight_h_o[x]) { cout << endl << "memory problem!"; exit(1); }
163     }
164     errorsignal_hidden = new double [hidden_array_size];
165     if(!errorsignal_hidden) { cout << endl << "memory problem!"; exit(1); }
166     errorsignal_output = new double [output_array_size];
167     if(!errorsignal_output) { cout << endl << "memory problem!"; exit(1); }
168     return;
169 }
170
171 void learn()
172 {
173     if (file_loaded == 0)
174     {
175         cout << endl
176             << "there is no data loaded into memory"
177             << endl;
178         return;
179     }
180     cout << endl << "learning..." << endl << "press a key to return to menu" << endl;
181     register int y;
182     while(!kbhit()) {
183         for(y=0; y<number_of_input_patterns; y++) {
184             forward_pass(y);
185             backward_pass(y);
186         }
187         if(compare_output_to_target()) {
188             cout << endl << "learning successful" << endl;
189             return;
190         }
191
192     }
193     cout << endl << "learning not successful yet" << endl;
194     return;
195 }
196
197 void load_data(char *arg) {
198     int x, y;
199     ifstream in(arg);
200     if(!in) { cout << endl << "failed to load data file" << endl; file_loaded = 0; return; }
201     in >> input_array_size;
202     in >> hidden_array_size;
203     in >> output_array_size;
204     in >> learning_rate;
205     in >> number_of_input_patterns;
206     bias_array_size = hidden_array_size + output_array_size;
207     initialize_net();
208     for(x = 0; x < bias_array_size; x++) in >> bias[x];
209     for(x=0; x<input_array_size; x++) {
210         for(y=0; y<hidden_array_size; y++) in >> weight_i_h[x][y];
211     }
212     for(x = 0; x < hidden_array_size; x++) {
213         for(y=0; y<output_array_size; y++) in >> weight_h_o[x][y];
214     }
215     for(x=0; x < number_of_input_patterns; x++) {
216         for(y=0; y<input_array_size; y++) in >> input[x][y];
217     }
218     for(x=0; x < number_of_input_patterns; x++) {
219         for(y=0; y<output_array_size; y++) in >> target[x][y];
220     }
221     in.close();
222     cout << endl << "data loaded" << endl;
223     return;
224 }
225
226
227 void forward_pass(int pattern)
228 {
229     _control87(MCW_EM, MCW_EM);
230     register double temp=0;
231     register int x,y;
232
233     // INPUT -> HIDDEN
234     for(y=0; y<hidden_array_size; y++) {
235         for(x=0; x<input_array_size; x++) {
236             temp += (input[pattern][x] * weight_i_h[x][y]);
237         }
238         hidden[y] = (1.0 / (1.0 + exp(-1.0 * (temp + bias[y]))));
239         temp = 0;
240     }
241
242     // HIDDEN -> OUTPUT
243     for(y=0; y<output_array_size; y++) {
244         for(x=0; x<hidden_array_size; x++) {
245             temp += (hidden[x] * weight_h_o[x][y]);
246         }
247         output[pattern][y] = (1.0 / (1.0 + exp(-1.0 * (temp + bias[y + hidden_array_size]))));
248         temp = 0;
249     }
250     return;
251 }
252
253
254
255 void backward_pass(int pattern)
256 {
257     register int x, y;
258     register double temp = 0;
259
260     // COMPUTE ERRORSIGNAL FOR OUTPUT UNITS
261     for(x=0; x<output_array_size; x++) {
262         errorsignal_output[x] = (target[pattern][x] - output[pattern][x]);
263     }
264
265     // COMPUTE ERRORSIGNAL FOR HIDDEN UNITS
266     for(x=0; x<hidden_array_size; x++) {
267         for(y=0; y<output_array_size; y++) {
268             temp += (errorsignal_output[y] * weight_h_o[x][y]);
269         }
270         errorsignal_hidden[x] = hidden[x] * (1-hidden[x]) * temp;
271         temp = 0.0;
272     }
273
274     // ADJUST WEIGHTS OF CONNECTIONS FROM HIDDEN TO OUTPUT UNITS
275     double length = 0.0;
276     for (x=0; x<hidden_array_size; x++) {
277         length += hidden[x]*hidden[x];
278     }
279     if (length<=0.1) length = 0.1;
280     for(x=0; x<hidden_array_size; x++) {
281         for(y=0; y<output_array_size; y++) {
282             weight_h_o[x][y] += (learning_rate * errorsignal_output[y] *
283                 hidden[x]/length);
284         }
285     }
286
287     // ADJUST BIASES OF HIDDEN UNITS
288     for(x=hidden_array_size; x<bias_array_size; x++) {
289         bias[x] += (learning_rate * errorsignal_output[x] / length);
290     }
291
292     // ADJUST WEIGHTS OF CONNECTIONS FROM INPUT TO HIDDEN UNITS
293     length = 0.0;
294     for (x=0; x<input_array_size; x++) {
295         length += input[pattern][x]*input[pattern][x];
296     }
297     if (length<=0.1) length = 0.1;
298     for(x=0; x<input_array_size; x++) {
299         for(y=0; y<hidden_array_size; y++) {
300             weight_i_h[x][y] += (learning_rate * errorsignal_hidden[y] *
301                 input[pattern][x]/length);
302         }
303     }
304
305     // ADJUST BIASES FOR OUTPUT UNITS
306     for(x=0; x<hidden_array_size; x++) {
307         bias[x] += (learning_rate * errorsignal_hidden[x] / length);
308     }
309     return;
310 }
311
312 int compare_output_to_target()
313 {
314     register int y,z;
315     register double temp, error = 0.0;
316     temp = target[ytemp][ztemp] - output[ytemp][ztemp];
317     if (temp < 0) error -= temp;
318     else error += temp;
319     if(error > max_error_tollerance) return 0;
320     error = 0.0;
321     for(y=0; y < number_of_input_patterns; y++) {
322         for(z=0; z < output_array_size; z++) {
323             temp = target[y][z] - output[y][z];
324             if (temp < 0) error -= temp;
325             else error += temp;
326             if(error > max_error_tollerance) {
327                 ytemp = y;
328                 ztemp = z;
329                 return 0;
330             }
331             error = 0.0;
332         }
333     }
334     return 1;
335 }
336
337 void save_data(char *argres) {
338     int x, y;
339     ofstream out;
340     out.open(argres);
341     if(!out) { cout << endl << "failed to save file" << endl; return; }
342     out << input_array_size << endl;
343     out << hidden_array_size << endl;
344     out << output_array_size << endl;
345     out << learning_rate << endl;
346     out << number_of_input_patterns << endl << endl;
347     for(x=0; x<bias_array_size; x++) out << bias[x] << ‘ ‘;
348     out << endl << endl;
349     for(x=0; x<input_array_size; x++) {
350         for(y=0; y<hidden_array_size; y++) out << weight_i_h[x][y] << ‘ ‘;
351     }
352     out << endl << endl;
353     for(x=0; x<hidden_array_size; x++) {
354         for(y=0; y<output_array_size; y++) out << weight_h_o[x][y] << ‘ ‘;
355     }
356     out << endl << endl;
357     for(x=0; x<number_of_input_patterns; x++) {
358         for(y=0; y<input_array_size; y++) out << input[x][y] << ‘ ‘;
359         out << endl;
360     }
361     out << endl;
362     for(x=0; x<number_of_input_patterns; x++) {
363         for(y=0; y<output_array_size; y++) out << target[x][y] << ‘ ‘;
364         out << endl;
365     }
366     out.close();
367     cout << endl << "data saved" << endl;
368     return;
369 }
370
371 void make()
372 {
373     int x, y, z;
374     double inpx, bias_array_size, input_array_size, hidden_array_size, output_array_size;
375     char makefilename[128];
376     cout << endl << "enter name of new data file: ";
377     cin >> makefilename;
378     ofstream out;
379     out.open(makefilename);
380     if(!out) { cout << endl << "failed to open file" << endl; return;}
381     cout << "how many input units? ";
382     cin >> input_array_size;
383     out << input_array_size << endl;
384     cout << "how many hidden units? ";
385     cin >> hidden_array_size;
386     out << hidden_array_size << endl;
387     cout << "how many output units? ";
388     cin >> output_array_size;
389     out << output_array_size << endl;
390     bias_array_size = hidden_array_size + output_array_size;
391     cout << endl << "Learning rate: ";
392     cin >> inpx;
393     out << inpx << endl;
394     cout << endl << "Number of input patterns: ";
395     cin >> z;
396     out << z << endl << endl;
397     for(x=0; x<bias_array_size; x++) out << (1.0 - (2.0 * bedlam((long*)(gaset)))) << ‘ ‘;
398     out << endl << endl;
399     for(x=0; x<input_array_size; x++) {
400         for(y=0; y<hidden_array_size; y++) out << (1.0 - (2.0 * bedlam((long*)(gaset)))) << ‘ ‘;
401     }
402     out << endl << endl;
403     for(x=0; x<hidden_array_size; x++) {
404         for(y=0; y<output_array_size; y++) out << (1.0 - (2.0 * bedlam((long*)(gaset)))) << ‘ ‘;
405     }
406     out << endl << endl;
407     for(x=0; x < z; x++) {
408         cout << endl << "input pattern " << (x + 1) << endl;
409         for(y=0; y<input_array_size; y++) {
410             cout << (y+1) << ": ";
411             cin >> inpx;
412             out << inpx << ‘ ‘;
413         }
414         out << endl;
415     }
416     out << endl;
417     for(x=0; x < z; x++) {
418         cout << endl << "target output pattern " << (x+1) << endl;
419         for(y=0; y<output_array_size; y++) {
420             cout << (y+1) << ": ";
421             cin >> inpx;
422             out << inpx << ‘ ‘;
423         }
424         out << endl;
425     }
426     out.close();
427     cout << endl << "data saved, to work with this new data file you first have to load it" << endl;
428     return;
429 }
430
431 float bedlam(long *idum)
432 {
433     int xj;
434     long xk;
435     static long iy=0;
436     static long iv[NTAB];
437     float temp;
438
439     if(*idum <= 0 || !iy)
440     {
441         if(-(*idum) < 1)
442         {
443             *idum = 1 + *idum;
444         }
445         else
446         {
447             *idum = -(*idum);
448         }
449         for(xj = NTAB+7; xj >= 0; xj--)
450         {
451             xk = (*idum) / IQ;
452             *idum = IA * (*idum - xk * IQ) - IR * xk;
453             if(*idum < 0)
454             {
455                 *idum += IM;
456             }
457             if(xj < NTAB)
458             {
459                 iv[xj] = *idum;
460             }
461         }
462         iy = iv[0];
463     }
464
465     xk = (*idum) / IQ;
466     *idum = IA * (*idum - xk * IQ) - IR * xk;
467     if(*idum < 0)
468     {
469         *idum += IM;
470     }
471     xj = iy / NDIV;
472     iy = iv[xj];
473     iv[xj] = *idum;
474
475     if((temp=AM*iy) > RNMX)
476     {
477         return(RNMX);
478     }
479     else
480     {
481         return(temp);
482     }
483 }
484
485 void test()
486 {
487     pattern = 0;
488     while(pattern == 0) {
489         cout << endl << endl << "There are " << number_of_input_patterns << " input patterns in the file," << endl << "enter a number within this range: ";
490         pattern = getnumber();
491     }
492     pattern--;
493     forward_pass(pattern);
494     output_to_screen();
495     return;
496 }
497
498 void output_to_screen()
499 {
500     int x;
501     cout << endl << "Output pattern:" << endl;
502     for(x=0; x<output_array_size; x++) {
503         cout << endl << (x+1) << ": " << output[pattern][x] << "    binary: ";
504         if(output[pattern][x] >= 0.9) cout << "1";
505         else if(output[pattern][x]<=0.1) cout << "0";
506         else cout << "intermediate value";
507     }
508     cout << endl;
509     return;
510 }
511
512 int getnumber()
513 {
514     int a, b = 0;
515     char c, d[5];
516     while(b<4) {
517         do { c = getch(); } while (c != ‘1‘ && c != ‘2‘ && c != ‘3‘ && c != ‘4‘ && c != ‘5‘ && c != ‘6‘ && c != ‘7‘ && c != ‘8‘ && c != ‘9‘ && c != ‘0‘ && toascii(c) != 13);
518         if(toascii(c)==13) break;
519         if(toascii(c)==27) return 0;
520         d[b] = c;
521         cout << c;
522         b++;
523     }
524     d[b] = ‘\0‘;
525     a = atoi(d);
526     if(a < 0 || a > number_of_input_patterns) a = 0;
527     return a;
528 }
529
530 void get_file_name()
531 {
532     cout << endl << "enter name of file to load: ";
533     cin >> filename;
534     return;
535 }
536
537 void print_data()
538 {
539     char choice;
540     if (file_loaded == 0)
541     {
542         cout << endl
543             << "there is no data loaded into memory"
544             << endl;
545         return;
546     }
547     cout << endl << "1. print data to screen" << endl;
548     cout << "2. print data to file" << endl;
549     cout << "3. return to main menu" << endl << endl;
550     cout << "Enter your choice (1-3)" << endl;
551     do { choice = getch(); } while (choice != ‘1‘ && choice != ‘2‘ && choice != ‘3‘);
552     switch(choice) {
553     case ‘1‘: print_data_to_screen();
554         break;
555     case ‘2‘: print_data_to_file();
556         break;
557     case ‘3‘: return;
558     };
559     return;
560 }
561
562
563 void print_data_to_screen() {
564     register int x, y;
565     cout << endl << endl << "DATA FILE: " << filename << endl;
566     cout << "learning rate: " << learning_rate << endl;
567     cout << "input units: " << input_array_size << endl;
568     cout << "hidden units: " << hidden_array_size << endl;
569     cout << "output units: " << output_array_size << endl;
570     cout << "number of input and target output patterns: " << number_of_input_patterns  << endl << endl;
571     cout << "INPUT AND TARGET OUTPUT PATTERNS:";
572     for(x=0; x<number_of_input_patterns; x++) {
573         cout << endl << "input pattern: " << (x+1) << endl;
574         for(y=0; y<input_array_size; y++) cout << input[x][y] << "  ";
575         cout << endl << "target output pattern: " << (x+1) << endl;
576         for(y=0; y<output_array_size; y++) cout << target[x][y] << "  ";
577     }
578     cout << endl << endl << "BIASES:" << endl;
579     for(x=0; x<hidden_array_size; x++) {
580         cout << "bias of hidden unit " << (x+1) << ": " << bias[x];
581         if(x<output_array_size) cout << "      bias of output unit " << (x+1) << ": " << bias[x+hidden_array_size];
582         cout << endl;
583     }
584     cout << endl << "WEIGHTS:" << endl;
585     for(x=0; x<input_array_size; x++) {
586         for(y=0; y<hidden_array_size; y++) cout << "i_h[" << x << "][" << y << "]: " << weight_i_h[x][y] << endl;
587     }
588     for(x=0; x<hidden_array_size; x++) {
589         for(y=0; y<output_array_size; y++) cout << "h_o[" << x << "][" << y << "]: " << weight_h_o[x][y] << endl;
590     }
591     return;
592 }
593
594 void print_data_to_file()
595 {
596     char printfile[128];
597     cout << endl << "enter name of file to print data to: ";
598     cin >> printfile;
599     ofstream out;
600     out.open(printfile);
601     if(!out) { cout << endl << "failed to open file"; return; }
602     register int x, y;
603     out << endl << endl << "DATA FILE: " << filename << endl;
604     out << "input units: " << input_array_size << endl;
605     out << "hidden units: " << hidden_array_size << endl;
606     out << "output units: " << output_array_size << endl;
607     out << "learning rate: " << learning_rate << endl;
608     out << "number of input and target output patterns: " << number_of_input_patterns << endl << endl;
609     out << "INPUT AND TARGET OUTPUT PATTERNS:";
610     for(x=0; x<number_of_input_patterns; x++) {
611         out << endl << "input pattern: " << (x+1) << endl;
612         for(y=0; y<input_array_size; y++) out << input[x][y] << "  ";
613         out << endl << "target output pattern: " << (x+1) << endl;
614         for(y=0; y<output_array_size; y++) out << target[x][y] << "  ";
615     }
616     out << endl << endl << "BIASES:" << endl;
617     for(x=0; x<hidden_array_size; x++) {
618         out << "bias of hidden unit " << (x+1) << ": " << bias[x];
619         if(x<output_array_size) out << "      bias of output unit " << (x+1) << ": " << bias[x+hidden_array_size];
620         out << endl;
621     }
622     out << endl << "WEIGHTS:" << endl;
623     for(x=0; x<input_array_size; x++) {
624         for(y=0; y<hidden_array_size; y++) out << "i_h[" << x << "][" << y << "]: " << weight_i_h[x][y] << endl;
625     }
626     for(x=0; x<hidden_array_size; x++) {
627         for(y=0; y<output_array_size; y++) out << "h_o[" << x << "][" << y << "]: " << weight_h_o[x][y] << endl;
628     }
629     out.close();
630     cout << endl << "data has been printed to " << printfile << endl;
631     return;
632 }
633
634 void change_learning_rate()
635 {
636     if (file_loaded == 0)
637     {
638         cout << endl
639             << "there is no data loaded into memory"
640             << endl;
641         return;
642     }
643     cout << endl << "actual learning rate: " << learning_rate << " new value: ";
644     cin >> learning_rate;
645     return;
646 }
647
648 void compute_output_pattern()
649 {
650     if (file_loaded == 0)
651     {
652         cout << endl
653             << "there is no data loaded into memory"
654             << endl;
655         return;
656     }
657     char choice;
658     cout << endl << endl << "1. load trained input pattern into network" << endl;
659     cout << "2. load custom input pattern into network" << endl;
660     cout << "3. go back to main menu" << endl << endl;
661     cout << "Enter your choice (1-3)" << endl;
662     do { choice = getch(); } while (choice != ‘1‘ && choice != ‘2‘ && choice != ‘3‘);
663     switch(choice) {
664     case ‘1‘: test();
665         break;
666     case ‘2‘: custom();
667         break;
668     case ‘3‘: return;
669     };
670 }
671
672 void custom()
673 {
674     _control87 (MCW_EM, MCW_EM);
675     char filename[128];
676     register double temp=0;
677     register int x,y;
678     double *custom_input = new double [input_array_size];
679     if(!custom_input)
680     {
681         cout << endl << "memory problem!";
682         return;
683     }
684     double *custom_output = new double [output_array_size];
685     if(!custom_output)
686     {
687         delete [] custom_input;
688         cout << endl << "memory problem!";
689         return;
690     }
691     cout << endl << endl << "enter file that contains test input pattern: ";
692     cin >> filename;
693     ifstream in(filename);
694     if(!in) { cout << endl << "failed to load data file" << endl; return; }
695     for(x = 0; x < input_array_size; x++) {
696         in >> custom_input[x];
697     }
698     for(y=0; y<hidden_array_size; y++) {
699         for(x=0; x<input_array_size; x++) {
700             temp += (custom_input[x] * weight_i_h[x][y]);
701         }
702         hidden[y] = (1.0 / (1.0 + exp(-1.0 * (temp + bias[y]))));
703         temp = 0;
704     }
705     for(y=0; y<output_array_size; y++) {
706         for(x=0; x<hidden_array_size; x++) {
707             temp += (hidden[x] * weight_h_o[x][y]);
708         }
709         custom_output[y] = (1.0 / (1.0 + exp(-1.0 * (temp + bias[y + hidden_array_size]))));
710         temp = 0;
711     }
712     cout << endl << "Input pattern:" << endl;
713     for(x = 0; x < input_array_size; x++) {
714         cout << "[" << (x + 1) << ": " << custom_input[x] << "]  ";
715     }
716     cout << endl << endl << "Output pattern:";
717     for(x=0; x<output_array_size; x++) {
718         cout << endl << (x+1) << ": " << custom_output[x] << "    binary: ";
719         if(custom_output[x] >= 0.9) cout << "1";
720         else if(custom_output[x]<=0.1) cout << "0";
721         else cout << "intermediate value";
722     }
723     cout << endl;
724     delete [] custom_input;
725     delete [] custom_output;
726     return;
727 }
728
729 void clear_memory()
730 {
731     int x;
732     for(x=0; x<number_of_input_patterns; x++)
733     {
734         delete [] input[x];
735     }
736     delete [] input;
737     delete [] hidden;
738     for(x=0; x<number_of_input_patterns; x++)
739     {
740         delete [] output[x];
741     }
742     delete [] output;
743     for(x=0; x<number_of_input_patterns; x++)
744     {
745         delete [] target[x];
746     }
747     delete [] target;
748     delete [] bias;
749     for(x=0; x<input_array_size; x++)
750     {
751         delete [] weight_i_h[x];
752     }
753     delete [] weight_i_h;
754     for(x=0; x<hidden_array_size; x++)
755     {
756         delete [] weight_h_o[x];
757     }
758     delete [] weight_h_o;
759     delete [] errorsignal_hidden;
760     delete [] errorsignal_output;
761     file_loaded = 0;
762     return;
763 }

初始化的神经网络的数据文件:

2
3
4
0.5
4
5.747781 -6.045236 1.206744 -41.245163 -0.249886 -0.35452 0.0718 

-8.446443 9.25553 -6.50087 7.357942 7.777944 1.238442 

15.957281 0.452741 -8.19198 9.140881 29.124746 9.806898 5.859479 -5.09182 -3.475694 -4.896269 6.320669 0.213897 

1 1
1 0
0 1
0 0 

1 1 0 1
0 1 1 0
0 1 1 1
0 0 0 1 

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!!explanation of datafile. Can be deleted. Not necessary for network to work!!!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

2    (number of input units)
3    (number of hidden units)
4    (number of output units)
0.5  (learning rate)
4    (number of input and target output patterns)     (has to correspond to the amount of patterns at the end of the datafile)
5.747781 -6.045236 1.206744 -41.245163 -0.249886 -0.35452 0.0718   (biases of hidden and output units, first three are biases of the hidden units, last four are biases of the output units)

-8.446443 9.25553 -6.50087 7.357942 7.777944 1.238442 (values of weights from input to hidden units)

15.957281 0.452741 -8.19198 9.140881 29.124746 9.806898 5.859479 -5.09182 -3.475694 -4.896269 6.320669 0.213897 (values of weights from hidden to output units)

1 1 (input pattern #1)
1 0 (input pattern #2)
0 1 (input pattern #3)
0 0 (input pattern #4)

1 1 0 1 (target output pattern #1)
0 1 1 0 (target output pattern #2)
0 1 1 1 (target output pattern #3)
0 0 0 1 (target output pattern #4) 

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!!                      end of explanation of datafile.                     !!!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

按照数据输入说明,可以再b.txt文件中保存输入数据[0, 1],对应的输入结果如下:

可以看到,输入[0,1]得到的结果为0110,与训练时候的结果一直。

最后,本代码没有深入测试过,同时也只有一个隐层,所以建议只用来配合梳理算法原理用。

deep learning(1)BP神经网络原理与练习

时间: 2024-11-10 08:30:24

deep learning(1)BP神经网络原理与练习的相关文章

BP神经网络原理及C++实战

前一段时间做了一个数字识别的小系统,基于BP神经网络算法的,用MFC做的交互.在实现过程中也试着去找一些源码,总体上来讲,这些源码的可移植性都不好,多数将交互部分和核心算法代码杂糅在一起,这样不仅代码阅读困难,而且重要的是核心算法不具备可移植性.设计模式,设计模式的重要性啊!于是自己将BP神经网络的核心算法用标准C++实现,这样可移植性就有保证的,然后在核心算法上实现基于不同GUI库的交互(MFC,QT)是能很快的搭建好系统的.下面边介绍BP算法的原理(请看<数字图像处理与机器视觉>非常适合做

bp神经网络原理

bp(back propagation)修改每层神经网络向下一层传播的权值,来减少输出层的实际值和理论值的误差 其实就是训练权值嘛 训练方法为梯度下降法 其实就是高等数学中的梯度,将所有的权值看成自变量,误差E作为因变量 即E=f(w1,w2,w3,....,wk)//这些w就是每一层向下一层转移的权值,具体在哪层不要管,只有计算上的差别 现在我们希望最小化E的值, 怎么最小化呢?就通过修改w的值来最小化 首先我们计算E的梯度T 然后沿着梯度下降就行了,就是说,假设原来的向量是X,那么新的向量X

Deep Learning(深度学习)学习笔记整理

申明:本文非笔者原创,原文转载自:http://www.sigvc.org/bbs/thread-2187-1-3.html 4.2.初级(浅层)特征表示 既然像素级的特征表示方法没有作用,那怎样的表示才有用呢? 1995 年前后,Bruno Olshausen和 David Field 两位学者任职 Cornell University,他们试图同时用生理学和计算机的手段,双管齐下,研究视觉问题. 他们收集了很多黑白风景照片,从这些照片中,提取出400个小碎片,每个照片碎片的尺寸均为 16x1

通俗讲解BP神经网络

BP(backward propogation)神经网络是广泛使用的一种神经网络.要我说,神经网络就是一种高端的插值技术.相应比较好的实现教程有: Matlab工具箱版本(使用简便,但是不适用于理解原理):漫谈ANN(2):BP神经网络: Matlab原理实现(根据原理实现的版本,未使用神经网络工具箱):简单易学的机器学习算法--神经网络之BP神经网络: C++原理实现(根据原理实现):BP神经网络原理及C++实战 三篇文章,第2.3篇适用于理解原理.详细的数学推导已经在里面了,就不赘述了.下面

Deep Learning(深度学习)学习笔记整理系列 | @Get社区

body { font-family: Microsoft YaHei UI,"Microsoft YaHei", Georgia,Helvetica,Arial,sans-serif,宋体, PMingLiU,serif; font-size: 10.5pt; line-height: 1.5; } html, body { } h1 { font-size:1.5em; font-weight:bold; } h2 { font-size:1.4em; font-weight:bo

【转载】Deep Learning(深度学习)学习笔记整理

目录: 一.概述 二.背景 三.人脑视觉机理 四.关于特征 4.1.特征表示的粒度 4.2.初级(浅层)特征表示 4.3.结构性特征表示 4.4.需要有多少个特征? 五.Deep Learning的基本思想 六.浅层学习(Shallow Learning)和深度学习(Deep Learning) 七.Deep learning与Neural Network 八.Deep learning训练过程 8.1.传统神经网络的训练方法 8.2.deep learning训练过程 九.Deep Learn

Deep Learning(深度学习)学习笔记整理系列之(三)

Deep Learning(深度学习)学习笔记整理系列 [email protected] http://blog.csdn.net/zouxy09 作者:Zouxy version 1.0 2013-04-08 声明: 1)该Deep Learning的学习系列是整理自网上很大牛和机器学习专家所无私奉献的资料的.具体引用的资料请看参考文献.具体的版本声明也参考原文献. 2)本文仅供学术交流,非商用.所以每一部分具体的参考资料并没有详细对应.如果某部分不小心侵犯了大家的利益,还望海涵,并联系博主

Deep Learning(深度学习)学习笔记整理系列(三)——Deep Learning的基本思想

[email protected] http://blog.csdn.net/zouxy09 作者:Zouxy version 1.0  2013-04-08 1)该Deep Learning的学习系列是整理自网上很大牛和机器学习专家所无私奉献的资料的.具体引用的资料请看参考文献.具体的版本声明也参考原文献. 2)本文仅供学术交流,非商用.所以每一部分具体的参考资料并没有详细对应.如果某部分不小心侵犯了大家的利益,还望海涵,并联系博主删除. 3)本人才疏学浅,整理总结的时候难免出错,还望各位前辈

Deep Learning(深度学习)之(二)Deep Learning的基本思想

五.Deep Learning的基本思想 假设我们有一个系统S,它有n层(S1,-Sn),它的输入是I,输出是O,形象地表示为: I =>S1=>S2=>-..=>Sn => O,如果输出O等于输入I,即输入I经过这个系统变化之后没有任何的信息损失(呵呵,大牛说,这是不可能的.信息论中有个"信息逐层丢失"的说法(信息处理不等式),设处理a信息得到b,再对b处理得到c,那么可以证明:a和c的互信息不会超过a和b的互信息.这表明信息处理不会增加信息,大部分处理