/* autobliss.cpp - a program that responds to reward and punishment. (C) 2007, Matt Mahoney. This program is free software under GPL, http://www.gnu.org/licenses/gpl-3.0.txt autobliss 1.0 simulates a programmable 2-input logic gate that you can program using reinforcement learning, i.e. reward and punishment. It can learn any of the 16 possible 2-input logic functions, i.e. AND, OR, XOR, NAND, etc. You train it by rewarding it when it gives the correct output (0 or 1) for a given input (00, 01, 10, 11), and punishing it when it gives the wrong output. To run: autobliss where is a 4 bit binary string representing the function to be learned, e.g. 0001 for AND or 0110 for XOR. The 4 digits represent the desired output for inputs 00, 01, 10, and 11 respectively. is the reinforcement signal to be given when the output is correct, normally around 1.0. is the reinforcment signal to be given when the output is incorrect, normally around -1.0. The program repeatedly trains the logic gate and prints the result of each training session until you type Ctrl-C to kill it. A training session consists of a randomly chosen input, the gate's output, and the reinforcement signal expressed anthropomorphically. For example, to program the OR function: autobliss 0111 2 -2.5 Input: 01 Output: 0 Ouch! Input: 10 Output: 1 aah! Input: 00 Output: 0 aah! ^C When the program dies, it reports its age (in number of training sessions), the contents of its memory (tendency to output 0 or 1 for each of 4 input combinations), and how much pleasure and pain it experienced in its lifetime. Note that you can put the program in an eternal state of bliss by making both reinforcement signals positive, or continuous pain by making both negative. However, excessive torture will kill the program. Please be gentle. */ #include #include #include #include #include // Learned state, the tendency to output 1 for input 00,01,10,11 double mem[4]={0,0,0,0}; // Statistics double pleasure = 0, pain = 0; // Accumulated reward int sessions = 0; // Number of training sessions // Ctrl-C and abort() catcher void die(int x) { printf("\n\nI felt %g units of pleasure and %g of pain in %d sessions.\n", pleasure, pain, sessions); printf("I learned the function: (00 01 10 11) --> (%g %g %g %g)\n", mem[0], mem[1], mem[2], mem[3]); exit(0); } int main(int argc, char **argv) { // Check usage if (argc!=4 || strlen(argv[1])!=4) { printf("Autobliss 1.0 (C) 2007, Matt Mahoney\n" "Free under GPL, http://www.gnu.org/licenses/gpl-3.0.txt\n" "\n" "Usage: autobliss bbbb reward penalty\n" "where bbbb is 0000...1111, the 2 input logic function to be learned,\n" "reward is the positive reinforcement signal for a correct output,\n" "penalty is the negative signal for incorrect output.\n"); exit(1); } // Catch Ctrl-C and abort() signal(SIGINT, die); signal(SIGABRT, die); // Train forever while (1) { // Choose a random input from 00..11 int input = rand()&3; // Compute output 0 or 1 int output; if (mem[input] > 100) output = 1; else if (mem[input] < -100) output = 0; else output = RAND_MAX > rand()*(1+exp(-mem[input])); // Compute reinformcent signal double reward; if (argv[1][input]-'0' == output) reward = atof(argv[2]); // correct output else reward = atof(argv[3]); // wrong output // Learn mem[input] += reward*(2*output-1); // Report results printf("Input: %d%d Output: %d ", input/2, input&1, output); if (reward>3) printf("Oh yes!!!\n"); // act human :-) else if (reward>0.5) printf("aah!\n"); else if (reward>-0.5) printf("OK\n"); else if (reward>-1.5) printf("Oh!\n"); else if (reward>-3) printf("Ouch!\n"); else if (reward>-50) printf("AAARGH!!!\n"); else printf("Fatal error\n"), abort(); // Gather statistics ++sessions; if (reward>0) pleasure += reward; else pain -= reward; if (pain-pleasure>1000) printf("Goodbye cruel world!\n"), abort(); } }