Homepage Demos Overview Downloads Tutorials Reference
Credits
Main Page | Namespace List | Class Hierarchy | Alphabetical List | Compound List | File List | Namespace Members | Compound Members | File Members | Related Pages | Search

karmedbandit.h

Go to the documentation of this file.
00001 //-*-c++-*-
00002 #ifndef INCLUDED_karmedbandit_h_
00003 #define INCLUDED_karmedbandit_h_
00004 
00005 #include <vector>
00006 #include <iostream>
00007 #include <stdlib.h>
00008 
00009 //!Makes decisions regarding an adversarial k-armed bandit
00010 /*! Uses algorithms described in:
00011  *  The non-stochastic multi-armed bandit problem
00012  *  Auer, Cesa-Bianchi, Freund, and Schapire
00013  *  October 14, 2002
00014  */
00015 class karmedbanditExp3 {
00016  public:
00017   //!constructor, pass the number of arms
00018   karmedbanditExp3(unsigned int k,double gammap)
00019     : w(k,1),lastp(0),last(-1U),g(gammap)
00020   {}
00021 
00022   //!returns the next choice, [0:k-1]
00023   unsigned int decide() {
00024     std::vector<double> p(w.size());
00025     double wsum=0;
00026     cout << "w =";
00027     for(unsigned int i=0; i<w.size(); i++)
00028       cout << ' ' << w[i];
00029     cout << endl;
00030     for(unsigned int i=0; i<w.size(); i++)
00031       wsum+=w[i];
00032     for(unsigned int i=0; i<w.size(); i++)
00033       p[i]=(1-g)*w[i]/wsum+g/w.size();
00034     cout << "p =";
00035     for(unsigned int i=0; i<w.size(); i++)
00036       cout << ' ' << p[i];
00037     cout << endl;
00038     double psum=0;
00039     for(unsigned int i=0; i<w.size(); i++)
00040       psum+=p[i];
00041     double pick=(rand()/(double)RAND_MAX)*psum;
00042     for(unsigned int i=0; i<w.size(); i++) {
00043       pick-=p[i];
00044       if(pick<=0) {
00045         lastp=p[i];
00046         return last=i;
00047       }
00048     }
00049     return -1U;
00050   }
00051   //!call this if you want to reward (r==true) or penalize (r==false) the previous decision
00052   void reward(bool r) {
00053     if(r) {
00054       w[last]*=exp(g/lastp/w.size());
00055       cout << "REWARD! :)" << endl;
00056     } else
00057       cout << "no reward. :(" << endl;
00058   }
00059   //!resets weights
00060   void reset() {
00061     for(unsigned int i=0; i<w.size(); i++)
00062       w[i]=1;
00063   }
00064   //!gets gamma parameter
00065   double getGamma() { return g; }
00066   //!sets gamma parameter
00067   void setGamma(double gammap) { g=gammap; }
00068   //!gets k parameter
00069   unsigned int getK() { return w.size(); }
00070  protected:
00071   std::vector<double> w; //!< the weights
00072   double lastp; //!< prob of last choice
00073   unsigned int last; //!< the last choice
00074   double g; //!< gamma
00075 };
00076 
00077 //!Makes decisions regarding an adversarial k-armed bandit
00078 /*! Uses algorithms described in:
00079  *  The non-stochastic multi-armed bandit problem
00080  *  Auer, Cesa-Bianchi, Freund, and Schapire
00081  *  October 14, 2002
00082  */
00083 class karmedbanditExp3_1 {
00084  public:
00085   //!constructor, pass the number of arms
00086   karmedbanditExp3_1(unsigned int k)
00087     : r(0), gr(0), last(0), G(k,0), exp3(k,0)
00088   {
00089     restart();
00090   }
00091 
00092   //!returns the next choice, [0:k-1]
00093   unsigned int decide() {
00094     double maxG=G[0];
00095     for(unsigned int i=1;i<G.size();i++)
00096       if(G[i]>maxG)
00097         maxG=G[i];
00098     if(maxG>gr-exp3.getK()/exp3.getGamma()) {
00099       restart();
00100       return last=decide();
00101     }
00102     return last=exp3.decide();
00103   }
00104   //!call this if you want to reward (r==true) or penalize (r==false) the previous decision
00105   void reward(bool rew) {
00106     if(rew)
00107       G[last]+=1;
00108     exp3.reward(rew);
00109   }
00110  protected:
00111   //!restarts exp3
00112   void restart() {
00113     std::cout << "Exp3 restart, g=" << std::flush;
00114     unsigned int k=exp3.getK();
00115     gr=(k*log(k))/(M_E-1)*pow(4,r);
00116     double gammap=sqrt(k*log(k)/(M_E-1)/gr);
00117     //    exp3.reset(); //not sure if we're supposed to do this
00118     exp3.setGamma(gammap<1?gammap:1);
00119     std::cout << (gammap<1?gammap:1) << std::endl;
00120     r++;
00121   }
00122   unsigned int r; //!< the number of restarts
00123   double gr; //!< the gamma_r parameter
00124   unsigned int last; //!< the last choice
00125   std::vector<double> G; //!< the G-hat's
00126   karmedbanditExp3 exp3; //!< runs exp3 within this
00127 };
00128 
00129 /*! @file
00130  * @brief Defines karmedbandit - implements an algorithm which makes decisions regarding an adversarial k-armed bandit
00131  * @author ejt (Creator)
00132  *
00133  * $Author: ejt $
00134  * $Name: tekkotsu-1_4_1 $
00135  * $Revision: 1.2 $
00136  * $State: Exp $
00137  * $Date: 2003/03/03 01:18:12 $
00138  */
00139 
00140 #endif

Tekkotsu v1.4
Generated Sat Jul 19 00:06:30 2003 by Doxygen 1.3.2