Tekkotsu Homepage
Demos
Overview
Downloads
Dev. Resources
Reference
Credits

PitchDetector.h

Go to the documentation of this file.
00001 //-*-c++-*-
00002 #ifndef INCLUDED_PitchDetector_h_
00003 #define INCLUDED_PitchDetector_h_
00004 
00005 #include "Events/EventGeneratorBase.h"
00006 #include "Shared/newmat/newmat.h"
00007 #include <cmath>
00008 
00009 //! Generates a PitchEvent whenever a notable frequency is detected using FFT
00010 class PitchDetector : public EventGeneratorBase {
00011 public:
00012   static const unsigned int fft_frames; //!< number frames to use for fft
00013   static const unsigned int num_pitches; //!< 5 octaves, 12 pitches per octave
00014   static const float base_pitch; //!< two octaves below tuning A (440)
00015   static const float half_step; //!< twelfth root of two: @f$ \sqrt[12]{2} @f$
00016   static const float sqrt_2_pi; //!< set to @f$ \sqrt{2\pi} @f$
00017   
00018   //! constructor
00019   PitchDetector() : EventGeneratorBase("PitchDetector",EventBase::micPitchEGID,reinterpret_cast<size_t>(this),EventBase::micOSndEGID), 
00020     left(), right(), iml(), imr(), rel(), rer(), pol(), por(), po(), 
00021     frame_sz(0), win_sz(0), rate(0), cur_frame(0), local_maxes(0),
00022     pitch_info(NULL), pitch_bin(NULL), have_fft(false) //,fft_file(0)
00023   {
00024     //fft_file = fopen("ms/data/sound/fft.dat", "w");
00025   } 
00026   
00027   //! destructor, asserts that #pitch_info and #pitch_bin have been deleted by doStop()
00028   ~PitchDetector();
00029   
00030   virtual void doStart(); //!< allocates and sets up #pitch_info and #pitch_bin, relies on EventGeneratorBase to manage event subscriptions
00031   virtual void doStop(); //!< deletes #pitch_info and #pitch_bin
00032   virtual void doEvent();
00033 
00034   static std::string getClassDescription() { return "Generates a PitchEvent whenever a notable frequency is detected using FFT"; }
00035   virtual std::string getDescription() const { return getClassDescription(); }
00036   
00037 
00038 protected:
00039   //! stores info about the pitch currently being detected
00040   struct PitchInfo {
00041     //! constructor -- sets everything to 0 (additional initialization is done for each #pitch_info entry during doStart())
00042     PitchInfo() : freq(0), sigma(0), local_max(0), global_max(0), overtone(0), confidence(0), amplitude(0), duration(0) {}
00043     
00044     float freq, //!< frequency of this pitch (calculated on instantiation)
00045     sigma,//!< standard dev to use (sqrt(var)), calc'd on start
00046     //following set only relevant if the pitch is on
00047     local_max, //!< [0,1] value: how much stronger it is than neighbors, else zero
00048     global_max, //!< [0,1] value: how much stronger than mean if global max, else zero
00049     overtone, //!< value in [0,1] with confidence that it is overtone            
00050     confidence, //!< value in [0,1] with confidence that this is a pitch, not noise (should confidence have a rolling average as well?)
00051     amplitude; //!< rolling average amplitude
00052     unsigned int duration; //!< number of windows it has been on 
00053   };
00054   
00055   //keep temporaries for efficiency
00056   NEWMAT::ColumnVector left, //!< the waveform of the left channel 
00057     right, //!< the waveform of the right channel
00058     iml, //!< imaginary outputs of the FFT for left channel
00059     imr, //!< imaginary outputs of the FFT for right channel
00060     rel, //!< real outputs of the FFT for the left channel
00061     rer, //!< real outputs of the FFT for the right channel
00062     pol, //!< absolute value (magnitude) of results of FFT for left channel
00063     por, //!< absolute value (magnitude) of results of FFT for right channel
00064     po; //!< average of #pol and #por for each bin
00065   unsigned int frame_sz, //!< number of samples given by system for each frame (assumed that once started, this won't change)
00066     win_sz, //!< number of samples to be recorded before running FFT (#frame_sz * #fft_frames)
00067     rate, //!< sampling frequence (Hz)
00068     cur_frame, //!< the current frame index to be filled in next, up to #fft_frames, when the FFT is run
00069     local_maxes; //!< number of individual peaks
00070   PitchInfo *pitch_info; //!< an array of PitchInfos, one for each of #num_pitches, allocated for scope spanning doStart()/doStop()
00071   float *pitch_bin; //!< array, holds current amplitude for each #num_pitches (mono)
00072   bool have_fft; //!< set to true after FFT has been computed, differentiates first filling of buffers from subsequent rewrites
00073   //FILE *fft_file;
00074   
00075   //! returns true if the confidence is above a threshold obtained from configuration
00076   static bool is_pitch(float conf);
00077   
00078   //! returns a confidence estimate of a pitch in bin @a p... (actual computation used is not obvious, you'll have to stare at the code /ejt)
00079   inline float confidence(unsigned int p, float strength) {
00080     float l = pitch_info[p].local_max,
00081     g = pitch_info[p].global_max,
00082     o = pitch_info[p].overtone;
00083     //XXX this sucks. add variance
00084     
00085     if (strength < win_sz * 5.0f)
00086       return 0.0f;
00087     
00088     if (g > 0.0f)
00089       return 3*g / 4.0f + (1.0f - o) / 8.0f + (1.0f - 2.0f * local_maxes / num_pitches) / 8.0f;
00090     else
00091       return l / 3.0f + (1.0f - o) / 4.0f + (1.0f - 2.0f * local_maxes / num_pitches) / 8.0f;
00092   }
00093   
00094   //! returns the value at @a x of a gaussian with the parameters @a mu and @a sigma
00095   static inline float gaussian_pdf(float mu, float sigma, float x) {
00096     float dist = x - mu;
00097     return std::exp(-dist * dist / (2.0f * sigma * sigma)) / (sqrt_2_pi * sigma);
00098   }
00099   
00100   //! returns a string representing the musical note of a given frequency
00101   static const char *pitch_name(unsigned int i) {
00102     static const char *pitch_names[12] = {
00103       "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
00104     };
00105     
00106     return pitch_names[i % 12];
00107   }
00108   
00109   //! unknown calculation, does some kind of rescaling of @a v (/ejt)
00110   static void hamming(NEWMAT::ColumnVector &v) {
00111     for (int i = 0; i != v.size(); ++i)
00112       v.element(i) = v.element(i) * (0.53836f - 0.46164f * cosf(2 * float(M_PI) * i / (v.size() - 1)));
00113   }
00114   
00115 private:
00116   PitchDetector(const PitchDetector&); //!< don't call (copy constructor)
00117   PitchDetector& operator=(const PitchDetector&); //!< don't call (assignment operator)
00118 };
00119 
00120 /*! @file
00121  * @brief Defines PitchDetector, which generates a PitchEvent whenever a notable frequency is detected using FFT
00122  * @author Matus Telgarsky and Jonah Sherman (Creators)
00123  * @author Ethan Tira-Thompson (imported into framework)
00124  *
00125  * Originally written as a part of a final project at Carnegie Mellon (15-494 Cognitive Robotics, Spring 2006)
00126  */
00127 
00128 #endif

Tekkotsu v5.1CVS
Generated Mon May 9 04:58:46 2016 by Doxygen 1.6.3