00001
00002 #ifndef INCLUDED_karmedbandit_h_
00003 #define INCLUDED_karmedbandit_h_
00004
00005 #include <vector>
00006 #include <iostream>
00007 #include <stdlib.h>
00008
00009
00010
00011
00012
00013
00014
00015 class karmedbanditExp3 {
00016 public:
00017
00018 karmedbanditExp3(unsigned int k,double gammap)
00019 : w(k,1),lastp(0),last(-1U),g(gammap)
00020 {}
00021
00022
00023 unsigned int decide() {
00024 std::vector<double> p(w.size());
00025 double wsum=0;
00026 cout << "w =";
00027 for(unsigned int i=0; i<w.size(); i++)
00028 cout << ' ' << w[i];
00029 cout << endl;
00030 for(unsigned int i=0; i<w.size(); i++)
00031 wsum+=w[i];
00032 for(unsigned int i=0; i<w.size(); i++)
00033 p[i]=(1-g)*w[i]/wsum+g/w.size();
00034 cout << "p =";
00035 for(unsigned int i=0; i<w.size(); i++)
00036 cout << ' ' << p[i];
00037 cout << endl;
00038 double psum=0;
00039 for(unsigned int i=0; i<w.size(); i++)
00040 psum+=p[i];
00041 double pick=(rand()/(double)RAND_MAX)*psum;
00042 for(unsigned int i=0; i<w.size(); i++) {
00043 pick-=p[i];
00044 if(pick<=0) {
00045 lastp=p[i];
00046 return last=i;
00047 }
00048 }
00049 return -1U;
00050 }
00051
00052 void reward(bool r) {
00053 if(r) {
00054 w[last]*=exp(g/lastp/w.size());
00055 cout << "REWARD! :)" << endl;
00056 } else
00057 cout << "no reward. :(" << endl;
00058 }
00059
00060 void reset() {
00061 for(unsigned int i=0; i<w.size(); i++)
00062 w[i]=1;
00063 }
00064
00065 double getGamma() { return g; }
00066
00067 void setGamma(double gammap) { g=gammap; }
00068
00069 unsigned int getK() { return w.size(); }
00070 protected:
00071 std::vector<double> w;
00072 double lastp;
00073 unsigned int last;
00074 double g;
00075 };
00076
00077
00078
00079
00080
00081
00082
00083 class karmedbanditExp3_1 {
00084 public:
00085
00086 karmedbanditExp3_1(unsigned int k)
00087 : r(0), gr(0), last(0), G(k,0), exp3(k,0)
00088 {
00089 restart();
00090 }
00091
00092
00093 unsigned int decide() {
00094 double maxG=G[0];
00095 for(unsigned int i=1;i<G.size();i++)
00096 if(G[i]>maxG)
00097 maxG=G[i];
00098 if(maxG>gr-exp3.getK()/exp3.getGamma()) {
00099 restart();
00100 return last=decide();
00101 }
00102 return last=exp3.decide();
00103 }
00104
00105 void reward(bool rew) {
00106 if(rew)
00107 G[last]+=1;
00108 exp3.reward(rew);
00109 }
00110 protected:
00111
00112 void restart() {
00113 std::cout << "Exp3 restart, g=" << std::flush;
00114 unsigned int k=exp3.getK();
00115 gr=(k*log(k))/(M_E-1)*pow(4,r);
00116 double gammap=sqrt(k*log(k)/(M_E-1)/gr);
00117
00118 exp3.setGamma(gammap<1?gammap:1);
00119 std::cout << (gammap<1?gammap:1) << std::endl;
00120 r++;
00121 }
00122 unsigned int r;
00123 double gr;
00124 unsigned int last;
00125 std::vector<double> G;
00126 karmedbanditExp3 exp3;
00127 };
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140 #endif