sift.hpp

Go to the documentation of this file.
00001 // file:        sift.hpp
00002 // author:      Andrea Vedaldi
00003 // description: Sift declaration
00004 
00005 // AUTORIGHTS
00006 // Copyright (c) 2006 The Regents of the University of California
00007 // All Rights Reserved.
00008 // 
00009 // Created by Andrea Vedaldi (UCLA VisionLab)
00010 // 
00011 // Permission to use, copy, modify, and distribute this software and its
00012 // documentation for educational, research and non-profit purposes,
00013 // without fee, and without a written agreement is hereby granted,
00014 // provided that the above copyright notice, this paragraph and the
00015 // following three paragraphs appear in all copies.
00016 // 
00017 // This software program and documentation are copyrighted by The Regents
00018 // of the University of California. The software program and
00019 // documentation are supplied "as is", without any accompanying services
00020 // from The Regents. The Regents does not warrant that the operation of
00021 // the program will be uninterrupted or error-free. The end-user
00022 // understands that the program was developed for research purposes and
00023 // is advised not to rely exclusively on the program for any reason.
00024 // 
00025 // This software embodies a method for which the following patent has
00026 // been issued: "Method and apparatus for identifying scale invariant
00027 // features in an image and use of same for locating an object in an
00028 // image," David G. Lowe, US Patent 6,711,293 (March 23,
00029 // 2004). Provisional application filed March 8, 1999. Asignee: The
00030 // University of British Columbia.
00031 // 
00032 // IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
00033 // FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
00034 // INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND
00035 // ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN
00036 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. THE UNIVERSITY OF
00037 // CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
00038 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00039 // A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
00040 // BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE
00041 // MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
00042 
00043 #ifndef VL_SIFT_HPP
00044 #define VL_SIFT_HPP
00045 
00046 #include<vector>
00047 #include<ostream>
00048 #include<cmath>
00049 #include<limits>
00050 
00051 #include "Shared/attributes.h"
00052 
00053 #if defined (VL_USEFASTMATH)
00054 #if defined (VL_MAC)
00055 #define VL_FASTFLOAT float
00056 #else
00057 #define VL_FASTFLOAT double
00058 #endif
00059 #else
00060 #define VL_FASTFLOAT float
00061 #endif
00062 
00063 #define VL_XEAS(x) #x
00064 #define VL_EXPAND_AND_STRINGIFY(x) VL_XEAS(x)
00065 
00066 /** @brief VisionLab namespace */
00067 namespace VL {
00068 
00069 /** @brief Pixel data type */
00070 typedef float pixel_t ;
00071 
00072 /** @brief Floating point data type 
00073  **
00074  ** Although floats are precise enough for this applicatgion, on Intel
00075  ** based architecture using doubles for floating point computations
00076  ** turns out to be much faster.
00077  **/
00078 typedef VL_FASTFLOAT float_t ;
00079 
00080 /** @brief 32-bit floating data type */
00081 typedef float float32_t ;
00082 
00083 /** @brief 64-bit floating data type */
00084 typedef double float64_t ;
00085 
00086 /** @brief 32-bit integer data type */
00087 typedef int int32_t ;
00088 
00089 /** @brief 64-bit integer data type */
00090 // typedef long long int int64_t ;
00091 
00092 /** @brief 32-bit unsigned integer data type */
00093 typedef int uint32_t ;
00094 
00095 /** @brief 8-bit unsigned integer data type */
00096 typedef char unsigned uint8_t ;
00097 
00098 /** @name Fast math
00099  ** 
00100  ** We provide approximate mathematical functions. These are usually
00101  ** rather faster than the corresponding standard library functions.
00102  **/
00103 /*@{*/
00104 float   fast_resqrt(float x) ;
00105 double  fast_resqrt(double x) ;
00106 float_t fast_expn(float_t x) ;
00107 float_t fast_abs(float_t x) ;
00108 float_t fast_mod_2pi(float_t x) ;
00109 float_t fast_atan2(float_t y, float_t x) ;
00110 float_t fast_sqrt(float_t x) ;
00111 int32_t fast_floor(float_t x) ;
00112 /*@}*/
00113 
00114 /** @brief Generic exception */
00115 struct
00116 EXPORT_SYMBOL
00117  Exception
00118 {
00119   /** @brief Build generic exception with message
00120    ** 
00121    ** The message can be accessed as the Exception::msg data member.
00122    **
00123    ** @param _msg message.
00124    **/
00125   Exception(std::string _msg) : msg(_msg) { }
00126 
00127   /** Exception message */
00128   std::string msg ; 
00129 } ;
00130 
00131 /** @brief Throw generic exception
00132  **
00133  ** The macro executes the stream operations @a x to obtain
00134  ** an error messages. The message is then wrapped in a
00135  ** generic exception VL::Exception and thrown.
00136  **
00137  ** @param x sequence of stream operations.
00138  **/
00139 #define VL_THROW(x)                             \
00140   {                                             \
00141     std::ostringstream oss ;                    \
00142     oss << x ;                                  \
00143     throw VL::Exception(oss.str()) ;            \
00144   }
00145 
00146 /** @name PGM input/output */
00147 /*@{*/
00148 /** @brief PGM buffer descriptor
00149  **
00150  ** The structure describes a gray scale image and it is used by the
00151  ** PGM input/output functions. The fileds are self-explanatory.
00152  **/
00153 struct PgmBuffer
00154 {
00155   int width ;     ///< Image width
00156   int height ;    ///< Image hegith
00157   pixel_t* data ; ///< Image data
00158 } ;
00159 std::ostream& insertPgm(std::ostream&, pixel_t const* im, int width, int height) ;
00160 std::istream& extractPgm(std::istream&, PgmBuffer& buffer) ;
00161 /** createPgmBufferFromArray added by Xinghao Pan on 14 Apr, 2008 **/
00162 void createPgmBufferFromArray(int w, int h, pixel_t* d, PgmBuffer& buffer) ;
00163 /*@}*/
00164 
00165 /** @brief SIFT filter
00166  **
00167  ** This class is a filter computing the Scale Invariant Feature
00168  ** Transform (SIFT).
00169  **/
00170 class Sift
00171 {
00172 
00173 public:
00174   
00175   /** @brief SIFT keypoint
00176    **
00177    ** A SIFT keypoint is charactedized by a location x,y and a scale
00178    ** @c sigma. The scale is obtained from the level index @c s and
00179    ** the octave index @c o through a simple formula (see the PDF
00180    ** documentation).
00181    **
00182    ** In addition to the location, scale indexes and scale, we also
00183    ** store the integer location and level. The integer location is
00184    ** unnormalized, i.e. relative to the resolution of the octave
00185    ** containing the keypoint (octaves are downsampled). 
00186    **/
00187   struct Keypoint
00188   {
00189     int o ;    ///< Keypoint octave index
00190 
00191     int ix ;   ///< Keypoint integer X coordinate (unnormalized)
00192     int iy ;   ///< Keypoint integer Y coordinate (unnormalized)
00193     int is ;   ///< Keypoint integer scale indiex
00194 
00195     float_t x  ;  ///< Keypoint fractional X coordinate
00196     float_t y  ;  ///< Keypoint fractional Y coordinate
00197     float_t s ;   ///< Keypoint fractional scale index
00198 
00199     float_t sigma ;  ///< Keypoint scale
00200   } ; 
00201 
00202   typedef std::vector<Keypoint>     Keypoints ;          ///< Keypoint list datatype
00203   typedef Keypoints::iterator       KeypointsIter ;      ///< Keypoint list iter datatype
00204   typedef Keypoints::const_iterator KeypointsConstIter ; ///< Keypoint list const iter datatype
00205 
00206   /** @brief Constructors and destructors */
00207   /*@{*/
00208   Sift(const pixel_t* _im_pt, int _width, int _height,
00209        float_t _sigman,
00210        float_t _sigma0,
00211        int _O, int __S,
00212        int _omin, int _smin, int _smax) ;
00213   ~Sift() ;
00214   /*@}*/
00215 
00216   void process(const pixel_t* _im_pt, int _width, int _height) ;
00217 
00218   /** @brief Querying the Gaussian scale space */
00219   /*@{*/
00220   VL::pixel_t* getOctave(int o) ;
00221   VL::pixel_t* getLevel(int o, int s) ;
00222   int          getWidth() const ;
00223   int          getHeight() const ;
00224   int          getOctaveWidth(int o) const ;
00225   int          getOctaveHeight(int o) const ;
00226   VL::float_t  getOctaveSamplingPeriod(int o) const ;
00227   VL::float_t  getScaleFromIndex(VL::float_t o, VL::float_t s) const ;
00228   Keypoint     getKeypoint(VL::float_t x, VL::float_t y, VL::float_t s) const ;
00229   /*@}*/
00230 
00231   /** @brief Descriptor parameters */
00232   /*@{*/
00233   bool getNormalizeDescriptor() const ;
00234   void setNormalizeDescriptor(bool) ;
00235   void setMagnification(VL::float_t) ;
00236   VL::float_t getMagnification() const ;  
00237   /*@}*/
00238 
00239   /** @brief Detector and descriptor */
00240   /*@{*/
00241   void detectKeypoints(VL::float_t threshold, VL::float_t edgeThreshold) ;
00242   int computeKeypointOrientations(VL::float_t angles [4], Keypoint keypoint) ; 
00243   void computeKeypointDescriptor(VL::float_t* descr_pt, Keypoint keypoint, VL::float_t angle) ;
00244   KeypointsIter keypointsBegin() ;
00245   KeypointsIter keypointsEnd() ;
00246   /*@}*/
00247     
00248 private:
00249   void prepareBuffers() ;
00250   void freeBuffers() ;
00251   void smooth(VL::pixel_t       * dst, 
00252         VL::pixel_t       * temp, 
00253               VL::pixel_t const * src, int width, int height, 
00254               VL::float_t s) ;
00255 
00256   void prepareGrad(int o) ;
00257   
00258   // scale space parameters
00259   VL::float_t sigman ;
00260   VL::float_t sigma0 ;
00261   VL::float_t sigmak ;
00262 
00263   int O ;
00264   int S ; 
00265   int omin ;
00266   int smin ; 
00267   int smax ;
00268 
00269   int width ;
00270   int height ;
00271 
00272   // descriptor parameters
00273   VL::float_t magnif ;
00274   bool        normalizeDescriptor ;
00275 
00276   // buffers
00277   VL::pixel_t*  temp ;
00278   int           tempReserved ;
00279   bool          tempIsGrad  ;
00280   int           tempOctave ;
00281   VL::pixel_t** octaves ;
00282   
00283   VL::pixel_t*  filter ;
00284   int           filterReserved ;
00285 
00286   Keypoints keypoints ;
00287 
00288 private:
00289   Sift(const Sift&); // Do not use
00290   Sift& operator=(const Sift&); // Do not use
00291 } ;
00292 
00293 
00294 }
00295 
00296 // Include inline functions definitions
00297 #include "sift.ipp"
00298 
00299 // VL_SIFT_HPP
00300 #endif