FilterBankGenerator.h

Go to the documentation of this file.
00001 //-*-c++-*-
00002 #ifndef INCLUDED_FilterBankGenerator_h_
00003 #define INCLUDED_FilterBankGenerator_h_
00004 
00005 #include "Events/EventGeneratorBase.h"
00006 #include "Shared/LoadSave.h"
00007 
00008 //! Abstract base class for generators of FilterBankEvent's
00009 /*! This is needed to provide an interface for the FilterBankEvent to
00010  *  call back when the actual image data is requested from it.  This
00011  *  facilitates lazy calculation of image data...  no sense in
00012  *  processing layers or channels which aren't actually going to be
00013  *  used...
00014  *
00015  *  Also this way we save on allocating/deallocating large memory
00016  *  blocks on each event... the buffers allocated here can be reused
00017  *  frame to frame.
00018  *
00019  *  Larger layer indicies generally indicate higher resolution images
00020  *  in a scaling pyramid, but you are free to store your own data
00021  *  however you wish.
00022  *
00023  *  <h3>Serialization Format</h3>
00024  *
00025  *  First, be sure to get a good overview of the LoadSave style.  Most
00026  *  serialization is handled using this interface.
00027  *
00028  *  When, for instance, RawCameraGenerator::saveBuffer() is called, it
00029  *  first calls it's super class, FilterBankGenerator::saveBuffer(),
00030  *  which will write out the general image information, common to all
00031  *  subclasses of FilterBankGenerator. (i'll cover the specifics in a
00032  *  second) Once that's done, the RawCameraGenerator adds it's own bit
00033  *  of header and then saves the image data itself.
00034  *
00035  *  Note that only a single channel is being saved at this point.  So
00036  *  for instance, all the Y information.  No interleaving is going
00037  *  on. (unless you're saving from InterleavedYUVGenerator of course,
00038  *  which treats the 3 interleaved channels as a single image)
00039  *  Otherwise,only one image (selected with selectSaveImage()) of the
00040  *  bank will loaded or saved at a time.
00041  *  
00042  *  So, anyway.  The first header will be the same for all
00043  *  FilterBankGenerator subclasses.  In the specification below, I'm
00044  *  going to use one field per line (the new lines are not literal,
00045  *  it's a binary stream).  Each field is of the form '<@c type:name>
00046  *  <i>(notes)</i>'
00047  *  
00048  *  FilterBankGenerator Header: (from FilterBankGenerator::saveBuffer())
00049  *  - <@c string: "FbkImage">  <i>(remember a 'string' is len+str+0; so this is the literal "\010\0\0\0FbkImage\0"; also remember "\010" is octal for 8)</i>
00050  *  - <@c unsigned @c int: width> 
00051  *  - <@c unsigned @c int: height> 
00052  *  - <@c unsigned @c int: image layer> 
00053  *  - <@c unsigned @c int: image channel> <i>(so notice you can tell which channel it is after it's been saved)</i>
00054  * 
00055  *  Generator Specific Header (selected examples follow, or similarly, any of the other generators)
00056  *  
00057  *  - RawCameraGenerator: (from RawCameraGenerator::saveBuffer())
00058  *    - <@c string: "RawImage">
00059  *    - <<tt>char[</tt>width<tt>*</tt>height<tt>]</tt>: image data> <i>(note, just once channel being stored)</i>
00060  *  - InterleavedYUVGenerator: (from InterleavedYUVGenerator::saveBuffer())
00061  *    - <@c string: "InterleavedYUVImage">
00062  *    - <<tt>char[</tt>width<tt>*</tt>height<tt>*3]</tt>: image data> <i>(in YVU order, technically YCbCr)</i>
00063  *  - SegmentedColorGenerator: (from SegmentedColorGenerator::saveBuffer())
00064  *    - <@c string: "SegColorImage">
00065  *    - <<tt>char[</tt>width<tt>*</tt>height<tt>]</tt>: image data> <i>(one byte per sample)</i>
00066  *    - <@c unsigned @c int: num_cols> <i>(number of different colors available)</i>
00067  *    - for each of num_col:
00068  *      - <@c char: red> <i>red color to use for display of this index</i>
00069  *      - <@c char: green> <i>green color to use for display of this index</i>
00070  *      - <@c char: blue> <i>blue color to use for display of this index</i>
00071  *  - RLEGenerator: (from RLEGenerator::saveBuffer())
00072  *    - <@c string: "RLEImage">  <i>(remember a 'string' is len+str+0; so this is the literal "\010\0\0\0RLEImage\0"; also remember "\010" is octal for 8)</i>
00073  *    - <@c unsigned @c int: num_runs> <i>(how many runs will follow)</i>
00074  *    - for each of num_runs:
00075  *      - <@c char: color> <i>(index value of color of run)</i>
00076  *      - <@c short: x> <i>(x position of start of run ("unknown" runs are skipped - assume index 0 for pixels which are jumped))</i>
00077  *      - <@c short: width> <i>(length of run, will not exceed remaining width of image)</i>
00078  *    - <i>notice there's no color information from RLE - it's not (shouldn't be) assuming anything about the data being compressed)</i>
00079  *
00080  *  However, while we're on the topic, I'll mention that although this
00081  *  is the same image format used for streaming to VisionGUI, there's
00082  *  a few more fields added by RawCam behavior or SegCam behavior at the
00083  *  beginning of each packet.  See those classes for more information
00084  *  on the wireless protocol. That should tell you everything you need
00085  *  to know to interpret the vision stream as well.
00086  *
00087  *  <h3>Adding New FilterBankGenerator Subclasses</h3>
00088  *
00089  *  If you're doing fancy memory stuff, you probably want to override
00090  *  the freeCaches() and destruct() functions so that the default
00091  *  implementation won't try to free something it shouldn't.  Don't
00092  *  forget to call them from your own destructor though, otherwise
00093  *  your versions won't get called before the default implementation's
00094  *  does.
00095  *
00096  *  If you want to be able to transmit or save your images, you will
00097  *  need to override the LoadSave functions (listed below) to provide
00098  *  your own code for interpreting the image data itself, and then
00099  *  create or modify a behavior to open a socket and transmit the
00100  *  information.  (you could do that from within the generator itself
00101  *  if you like)
00102  *
00103  *  You will probably also want to add a few extra functions to allow
00104  *  users to set compression/data format parameters.
00105  *
00106  *  @see RawCameraGenerator, SegmentedColorGenerator for the basic
00107  *  image access
00108  * 
00109  *  @see RLEGenerator, RegionGenerator for some relatively simple
00110  *  examples of vision stages if you want to make some of your own.
00111  */
00112 class FilterBankGenerator : public EventGeneratorBase, public LoadSave {
00113 public:
00114   // Constructors are all protected - doesn't make sense to
00115   // instantiate this class directly, you want to use a subclass
00116 
00117   //! destructor
00118   /*! Your own subclasses should also have destructors which call
00119    *  freeCaches() and destruct().  Otherwise, if you override these
00120    *  functions to delete any custom memory you allocate, those
00121    *  implementations won't be called by this destructor... a
00122    *  destructor ignores virtual functions, only calls at its own
00123    *  class level.\n
00124    *  So it really doesn't matter if you aren't allocating any extra
00125    *  memory other than what's in the image cache, but it's still good
00126    *  form just in case you add stuff later so you won't forget and
00127    *  leak memory everywhere */
00128   virtual ~FilterBankGenerator() {
00129     freeCaches();
00130     destruct();
00131   }
00132 
00133   //! returns the generator this is receiving its events from (or the last one anyway)
00134   virtual const FilterBankGenerator * getSourceGenerator() const { return src; }
00135 
00136   //! returns the number of image layers (e.g. different resolutions available)
00137   virtual unsigned int getNumLayers() const { return numLayers; }
00138 
00139   //! returns the number of channels per image (e.g. Y, U, or V components)
00140   virtual unsigned int getNumChannels() const { return numChannels; }
00141   
00142   //! returns pointer to the beginning of the image data for the specified layer and channel
00143   /*! this will cause the data to be calculated and cached if it's not already available */
00144   virtual unsigned char * getImage(unsigned int layer, unsigned int channel);
00145 
00146   //! returns the number of bytes used for the data returned by getImage() - if the data varies in size (e.g. jpeg compression), will return 0 if the image hasn't been calculated yet (so call it @e after getImage())
00147   virtual size_t getImageSize(unsigned int layer, unsigned int /*chan*/) const { return widths[layer]*heights[layer]; }
00148   
00149   //! returns whether or not an image has already been calculated for the current frame
00150   /*! If you call this immediately after getImage() and this still returns false, 
00151    *  then an error must have occurred during processing */
00152   virtual bool getImageCached(unsigned int layer, unsigned int channel) const { return imageValids[layer][channel]; }
00153 
00154   //! returns width (in samples) of the image in a given layer
00155   unsigned int getWidth(unsigned int layer) const { return widths[layer]; }
00156 
00157   //! returns height (in samples) of the image in a given layer
00158   unsigned int getHeight(unsigned int layer) const { return heights[layer]; }
00159   
00160   //! returns the bytes to skip from the one-past-end of a row to get the beginning of the next
00161   unsigned int getSkip(unsigned int layer) const { return skips[layer]; }
00162   
00163   //! returns the bytes to skip from the beginning of one row to get the beginning of the next
00164   /*! This is just for convenience; the stride is just the skip plus the width, but it's precomputed for you for speed and clarity */
00165   unsigned int getStride(unsigned int layer) const { return strides[layer]; }
00166 
00167   //! returns the increment (in bytes) to use to go from one sample to the next
00168   unsigned int getIncrement(unsigned int layer) const { return increments[layer]; }
00169   
00170   //! returns the frame number of the current frame, see #frameNumber
00171   unsigned int getFrameNumber() const { return frameNumber; }
00172   
00173   //! returns the number of frames processed, see #framesProcessed
00174   unsigned int getFramesProcessed() const { return framesProcessed; }
00175   
00176   //! returns a pointer to a particular sample; if you are using this in an inner loop, consider using the getSkip() and getIncrement() values to iterate with better performance
00177   /*! @param px      the horizontal pizel position, relative to left edge; no boundary checking is done, ranges 0 through width-1
00178    *  @param py      the vertical pixel position, relative to top edge; no boundary checking is done, ranges 0 through height-1
00179    *  @param layer   the resolution layer to extract from
00180    *  @param channel the image channel to extract from */
00181   unsigned char * getPixel(unsigned int px, unsigned int py, unsigned int layer, unsigned int channel) { return getImage(layer,channel)+py*getStride(layer)+px*getIncrement(layer); }
00182   
00183   //! returns a pointer to a particular sample; if you are using this in an inner loop, consider using the getSkip() and getIncrement() values to iterate with better performance
00184   /*! @param x       the horizontal position, relative to center of the image, left edge is -1 and right edge is 1; no boundary checking is done
00185    *  @param y       the vertical pixel position, relative to center of the image, top edge is the negative aspect ratio, bottom edge is positive aspect ratio; no boundary checking is done
00186    *  @param layer   the resolution layer to extract from
00187    *  @param channel the image channel to extract from
00188    *
00189    *  To keep the coordinate system square, the x is defined to range -1,1, but y's range depends on the
00190    *  aspect ratio of the image, height/width.  Thus typically y will approx. -.75,.75 */
00191   unsigned char * getPixel(float x, float y, unsigned int layer, unsigned int channel) {
00192     unsigned int px,py;
00193     getPixelCoordinates(px,py,x,y,layer);
00194     return getPixel(px,py,layer,channel);
00195   }
00196 
00197   //! sets the pixel-coordinate px and py parameters to the corresponding value of x and y
00198   /*! @param[out] px      the pixel position, relative to left edge, positive right, ranges 0 through width-1
00199    *  @param[out] py      the pixel position, relative to top edge, positive down, ranges 0 through height-1
00200    *  @param[in]  x       the horizontal position, relative to center of the image, left edge is -1 and right edge is 1; no boundary checking is done
00201    *  @param[in]  y       the vertical pixel position, relative to center of the image, top edge is the negative aspect ratio, bottom edge is positive aspect ratio; no boundary checking is done
00202    *  @param[in]  layer   the resolution layer the pixel coordinates are relative to
00203    *
00204    *  To keep the coordinate system square, the x is defined to range -1,1, but y's range depends on the
00205    *  aspect ratio of the image, height/width.  Thus typically y will approx. -.75,.75 */
00206   void getPixelCoordinates(unsigned int& px, unsigned int& py, float x, float y, unsigned int layer) const {
00207     //note width sets the scale for both, so coordinate system is square... is good? I'm up for debate.
00208     px=(unsigned int)((getWidth(layer)-1)*(x+1)/2+.5f); //+.5 to round to nearest
00209     float aspect=getHeight(layer)/(float)getWidth(layer);
00210     py=(unsigned int)((getHeight(layer)-1)*(y+aspect)/(aspect*2)+.5f);
00211   }
00212   
00213   //! sets the x and y parameters from the pixel-coordinates px and py
00214   /*! @param[out] x       the horizontal position, relative to center of the image, left edge is -1 and right edge is 1; no boundary checking is done
00215    *  @param[out] y       the vertical pixel position, relative to center of the image, top edge is the negative aspect ratio, bottom edge is positive aspect ratio; no boundary checking is done
00216    *  @param[in]  px      the pixel position, relative to left edge, positive right, ranges 0 through width-1
00217    *  @param[in]  py      the pixel position, relative to top edge, positive down, ranges 0 through height-1
00218    *  @param[in]  layer   the resolution layer the pixel coordinates are relative to
00219    *
00220    *  To keep the coordinate system square, the x is defined to range -1,1, but y's range depends on the
00221    *  aspect ratio of the image, height/width.  Thus typically y will approx. -.75,.75 */
00222   void getRealCoordinates(float& x, float& y, unsigned int px, unsigned int py, unsigned int layer) const {
00223     //note width sets the scale for both, so coordinate system is square... is good? I'm up for debate.
00224     x=px/(float)(getWidth(layer)-1)*2-1;
00225     float aspect=getHeight(layer)/(float)getWidth(layer);
00226     y=py/(float)(getHeight(layer)-1)*aspect*2-aspect;
00227   }
00228   
00229   //! deletes storage of cached images and marks it invalid
00230   /*! you should override this if the images cache pointer isn't actually an array of bytes... 
00231    *  Don't forget to call it in your subclass's destructor or your version won't get called... */
00232   virtual void freeCaches();
00233 
00234   //! marks all of the cached images as invalid (but doesn't free their memory)
00235   /*! You probably want to call this right before you send the FilterBankEvent */
00236   virtual void invalidateCaches();
00237 
00238   //! default implementation does a few common housekeeping chores for you - probably should just take a look at its code
00239   /*! It doesn't throw any events for you - that's probably the main
00240    *  reason you'd still want to override it\n
00241    *  Also, if your class has a set number of layers or channels - for
00242    *  instance, always 1 channel like InterleavedYUVGenerator, you
00243    *  should override setNumImages() to enforce that constraint by
00244    *  throwing away the appropriate argument and passing the your own
00245    *  value to the superclass implementation.*/
00246   virtual void doEvent();
00247   
00248   //!@name LoadSave interface
00249 
00250   virtual unsigned int getBinSize() const;
00251 
00252   virtual unsigned int loadBuffer(const char buf[], unsigned int len, const char* filename=NULL);
00253 
00254   virtual unsigned int saveBuffer(char buf[], unsigned int len) const;
00255 
00256   //! Not actually part of the LoadSave interface, but allows you to select which image of the bank will be saved
00257   /*! Calling this will also cause the image data for that image to be calculated,
00258    *  otherwise saveBuffer won't have up-to-date data to save.
00259    *  
00260    *  When loading, the saved image's layer and channel will reset this */
00261   virtual void selectSaveImage(unsigned int layer, unsigned int channel) { selectedSaveLayer=layer; selectedSaveChannel=channel; getImage(layer,channel);}
00262 
00263   virtual unsigned int getSelectedSaveLayer() const { return selectedSaveLayer; } //!< returns layer to be saved, or layer of last image loaded
00264   virtual unsigned int getSelectedSaveChannel() const { return selectedSaveChannel; } //!< returns channel to be saved, or channel of last image loaded
00265 
00266   //@}
00267 
00268 
00269 protected:
00270   //! constructor, separate class and instance names, with a raw event specification, excluding type typically for stages which reference the previous stage's data
00271   FilterBankGenerator(const std::string& instancename, EventBase::EventGeneratorID_t mgid, unsigned int msid, EventBase::EventGeneratorID_t srcegid, unsigned int srcsrc)
00272     : EventGeneratorBase(instancename, mgid, msid, srcegid, srcsrc),
00273       src(NULL), numLayers(0), numChannels(0), widths(NULL), heights(NULL), skips(NULL),
00274       strides(NULL), increments(NULL), images(NULL), imageValids(NULL), selectedSaveLayer(0),
00275       selectedSaveChannel(0), frameNumber(0), framesProcessed(0)
00276   { }
00277 
00278   //! constructor, separate class and instance names, with a raw event specification, including type typically for stages which will store their own copy of the data
00279   FilterBankGenerator(const std::string& instancename, EventBase::EventGeneratorID_t mgid, unsigned int msid, EventBase::EventGeneratorID_t srcegid, unsigned int srcsrc, EventBase::EventTypeID_t srcetid)
00280     : EventGeneratorBase(instancename, mgid, msid, srcegid, srcsrc, srcetid),
00281       src(NULL), numLayers(0), numChannels(0), widths(NULL), heights(NULL), skips(NULL),
00282       strides(NULL), increments(NULL), images(NULL), imageValids(NULL), selectedSaveLayer(0),
00283       selectedSaveChannel(0), frameNumber(0), framesProcessed(0)
00284   { }
00285 
00286   //! constructor, separate class and instance names, with a filter bank source, passes on all types typically for stages which reference the previous stage's data
00287   FilterBankGenerator(const std::string& instancename, EventBase::EventGeneratorID_t mgid, unsigned int msid, FilterBankGenerator * fbgsrc)
00288     : EventGeneratorBase(instancename, mgid, msid, fbgsrc!=NULL?fbgsrc->getGeneratorID():EventBase::numEGIDs, fbgsrc!=NULL?fbgsrc->getSourceID():0),
00289       src(fbgsrc), numLayers(0), numChannels(0), widths(NULL), heights(NULL), skips(NULL),
00290       strides(NULL), increments(NULL), images(NULL), imageValids(NULL), selectedSaveLayer(0),
00291       selectedSaveChannel(0), frameNumber(0), framesProcessed(0)
00292   {
00293     if(src!=NULL)
00294       setNumImages(src->getNumLayers(),src->getNumChannels());
00295   }
00296 
00297   //! constructor, separate class and instance names, with a filter bank source, accepts a particular type typically for stages which will store their own data
00298   FilterBankGenerator(const std::string& instancename, EventBase::EventGeneratorID_t mgid, unsigned int msid, FilterBankGenerator * fbgsrc, EventBase::EventTypeID_t etid)
00299     : EventGeneratorBase(instancename, mgid, msid, fbgsrc!=NULL?fbgsrc->getGeneratorID():EventBase::numEGIDs, fbgsrc!=NULL?fbgsrc->getSourceID():0,etid),
00300       src(fbgsrc), numLayers(0), numChannels(0), widths(NULL), heights(NULL), skips(NULL),
00301       strides(NULL), increments(NULL), images(NULL), imageValids(NULL), selectedSaveLayer(0),
00302       selectedSaveChannel(0), frameNumber(0), framesProcessed(0)
00303   {
00304     if(src!=NULL)
00305       setNumImages(src->getNumLayers(),src->getNumChannels());
00306   }
00307 
00308   //! resizes the filter bank information storage area, you should override this to do your setup and call it from your constructor
00309   /*! In general, it isn't expected that FilterBankGenerator's should
00310    *  necessarily be dynamically resizeable (although it would be
00311    *  nice), which is why this isn't public.  If yours is, just add
00312    *  some pubic accessor functions which call this.  In general, the
00313    *  included subclasses should be able to handle being resized, but
00314    *  there's no reason to do so since the system won't be changing
00315    *  its available resolutions at run time. 
00316    *
00317    *  The default implementation is a no-op if(numLayers==nLayers && numChannels==nChannels)
00318    */
00319   virtual void setNumImages(unsigned int nLayers, unsigned int nChannels);
00320 
00321   //! resets width and height parameters to that of the #src
00322   /*! You'll probably want to override this to also set #skips and #strides */
00323   virtual void setDimensions();
00324   
00325   //! create new image data storage area for the cache - this called by getImage() only when the corresponding entry in images is NULL
00326   /*! You should return the pointer you want stored in images to be
00327    *  returned by any calls to getFirstRow.  Interpretation of the
00328    *  data it points to is dependant on the the generator which
00329    *  creates it */
00330   virtual unsigned char * createImageCache(unsigned int layer, unsigned int channel) const=0;
00331 
00332   //! should calculate new image data, called by getImage() only when #imageValids indicates the image being requested is dirty (and only after getImage() has already called createImageCache())
00333   /*! This is where you'll want to put your user-specific code for calculating the image data */
00334   virtual void calcImage(unsigned int layer, unsigned int channel) =0;
00335 
00336   //! deletes the arrays
00337   virtual void destruct();
00338 
00339   //! updates the image data to make sure its up to date with what's available from the source
00340   /*! If someone calls getImage on a stage which hadn't been listening for
00341    *  events (an optimization to save time when it doesn't have any listeners
00342    *  of its own -- see EventGeneratorBase), then this will retroactively
00343    *  pull image data from the source even though the event for it was missed
00344    *
00345    *  @return false if no image data is available yet, true otherwise*/
00346   virtual bool refresh();
00347 
00348 
00349   FilterBankGenerator * src; //!< the generator of the last FilterBankEvent received
00350 
00351   unsigned int numLayers;   //!< current number of layers available
00352   unsigned int numChannels; //!< current number of channels available
00353 
00354   unsigned int * widths;    //!< an array of size numLayers, width (in samples) in pixels of each layer
00355   unsigned int * heights;   //!< an array of size numLayers, height (in samples) in pixels of each layer
00356   unsigned int * skips;     //!< an array of size numLayers, skip (in bytes) from row end to next row begin
00357   unsigned int * strides;   //!< an array of size numLayers, stride (in bytes) from a given column in one row to the same column in the next row
00358   unsigned int * increments;//!< an array of size numLayers, increment (in bytes) to use to get from one sample to the next
00359   
00360   mutable unsigned char *** images; //!< an array [numLayers][numChannels], stores pointer to cached image data
00361   mutable bool ** imageValids;      //!< an array [numLayers][numChannels], entry is true if cached data is still valid
00362 
00363   unsigned int selectedSaveLayer;   //!< layer to be manipulated with the LoadSave functions
00364   unsigned int selectedSaveChannel; //!< channel to be manipulated with the LoadSave functions
00365 
00366   //! the frame number of last frame received by doEvent - subclasses will need to set to the source's frameNumber if they don't call FilterBankGenerator::doEvent()
00367   /*! The idea is to use this as a unique serial number for each
00368    *  frame.  That way you can know if the current image in different
00369    *  generators is actually the same camera image before you try to
00370    *  compare or combine them.
00371    *
00372    *  You could also figure out the number of dropped frames by
00373    *  subtracting framesProcessed from this value.  Give some leeway
00374    *  however, because it takes the first 40-70 frames just to boot up
00375    *  (when running on the aibo), so there's no way they can be
00376    *  processed.
00377    */
00378   unsigned int frameNumber; 
00379 
00380   //! the current frame number available from the system - subclasses which receive data directly from the system should set this (and should not use EventGeneratorBase's auto-listen to ensure this is accurate)
00381   static unsigned int sysFrameNumber;
00382 
00383   //! subclasses should increment this any time they make a new filter bank available
00384   /*! this is automatically incremented if you use the FilterBankGenerator::doEvent() */
00385   unsigned int framesProcessed; 
00386 
00387 private:
00388   FilterBankGenerator(const FilterBankGenerator& fbk); //!< don't call
00389   const FilterBankGenerator& operator=(const FilterBankGenerator& fbk); //!< don't call
00390 };
00391 
00392 /*! @file
00393  * @brief Describes abstract base class for generators of FilterBankEvent's
00394  * @author ejt (Creator)
00395  */
00396 
00397 #endif