Tekkotsu Homepage | Demos | Overview | Downloads | Dev. Resources | Reference | Credits |
PitchDetector.hGo to the documentation of this file.00001 //-*-c++-*- 00002 #ifndef INCLUDED_PitchDetector_h_ 00003 #define INCLUDED_PitchDetector_h_ 00004 00005 #include "Events/EventGeneratorBase.h" 00006 #include "Shared/newmat/newmat.h" 00007 #include <cmath> 00008 00009 //! Generates a PitchEvent whenever a notable frequency is detected using FFT 00010 class PitchDetector : public EventGeneratorBase { 00011 public: 00012 static const unsigned int fft_frames; //!< number frames to use for fft 00013 static const unsigned int num_pitches; //!< 5 octaves, 12 pitches per octave 00014 static const float base_pitch; //!< two octaves below tuning A (440) 00015 static const float half_step; //!< twelfth root of two: @f$ \sqrt[12]{2} @f$ 00016 static const float sqrt_2_pi; //!< set to @f$ \sqrt{2\pi} @f$ 00017 00018 //! constructor 00019 PitchDetector() : EventGeneratorBase("PitchDetector",EventBase::micPitchEGID,reinterpret_cast<size_t>(this),EventBase::micOSndEGID), 00020 left(), right(), iml(), imr(), rel(), rer(), pol(), por(), po(), 00021 frame_sz(0), win_sz(0), rate(0), cur_frame(0), local_maxes(0), 00022 pitch_info(NULL), pitch_bin(NULL), have_fft(false) //,fft_file(0) 00023 { 00024 //fft_file = fopen("ms/data/sound/fft.dat", "w"); 00025 } 00026 00027 //! destructor, asserts that #pitch_info and #pitch_bin have been deleted by doStop() 00028 ~PitchDetector(); 00029 00030 virtual void doStart(); //!< allocates and sets up #pitch_info and #pitch_bin, relies on EventGeneratorBase to manage event subscriptions 00031 virtual void doStop(); //!< deletes #pitch_info and #pitch_bin 00032 virtual void doEvent(); 00033 00034 static std::string getClassDescription() { return "Generates a PitchEvent whenever a notable frequency is detected using FFT"; } 00035 virtual std::string getDescription() const { return getClassDescription(); } 00036 00037 00038 protected: 00039 //! stores info about the pitch currently being detected 00040 struct PitchInfo { 00041 //! constructor -- sets everything to 0 (additional initialization is done for each #pitch_info entry during doStart()) 00042 PitchInfo() : freq(0), sigma(0), local_max(0), global_max(0), overtone(0), confidence(0), amplitude(0), duration(0) {} 00043 00044 float freq, //!< frequency of this pitch (calculated on instantiation) 00045 sigma,//!< standard dev to use (sqrt(var)), calc'd on start 00046 //following set only relevant if the pitch is on 00047 local_max, //!< [0,1] value: how much stronger it is than neighbors, else zero 00048 global_max, //!< [0,1] value: how much stronger than mean if global max, else zero 00049 overtone, //!< value in [0,1] with confidence that it is overtone 00050 confidence, //!< value in [0,1] with confidence that this is a pitch, not noise (should confidence have a rolling average as well?) 00051 amplitude; //!< rolling average amplitude 00052 unsigned int duration; //!< number of windows it has been on 00053 }; 00054 00055 //keep temporaries for efficiency 00056 NEWMAT::ColumnVector left, //!< the waveform of the left channel 00057 right, //!< the waveform of the right channel 00058 iml, //!< imaginary outputs of the FFT for left channel 00059 imr, //!< imaginary outputs of the FFT for right channel 00060 rel, //!< real outputs of the FFT for the left channel 00061 rer, //!< real outputs of the FFT for the right channel 00062 pol, //!< absolute value (magnitude) of results of FFT for left channel 00063 por, //!< absolute value (magnitude) of results of FFT for right channel 00064 po; //!< average of #pol and #por for each bin 00065 unsigned int frame_sz, //!< number of samples given by system for each frame (assumed that once started, this won't change) 00066 win_sz, //!< number of samples to be recorded before running FFT (#frame_sz * #fft_frames) 00067 rate, //!< sampling frequence (Hz) 00068 cur_frame, //!< the current frame index to be filled in next, up to #fft_frames, when the FFT is run 00069 local_maxes; //!< number of individual peaks 00070 PitchInfo *pitch_info; //!< an array of PitchInfos, one for each of #num_pitches, allocated for scope spanning doStart()/doStop() 00071 float *pitch_bin; //!< array, holds current amplitude for each #num_pitches (mono) 00072 bool have_fft; //!< set to true after FFT has been computed, differentiates first filling of buffers from subsequent rewrites 00073 //FILE *fft_file; 00074 00075 //! returns true if the confidence is above a threshold obtained from configuration 00076 static bool is_pitch(float conf); 00077 00078 //! returns a confidence estimate of a pitch in bin @a p... (actual computation used is not obvious, you'll have to stare at the code /ejt) 00079 inline float confidence(unsigned int p, float strength) { 00080 float l = pitch_info[p].local_max, 00081 g = pitch_info[p].global_max, 00082 o = pitch_info[p].overtone; 00083 //XXX this sucks. add variance 00084 00085 if (strength < win_sz * 5.0f) 00086 return 0.0f; 00087 00088 if (g > 0.0f) 00089 return 3*g / 4.0f + (1.0f - o) / 8.0f + (1.0f - 2.0f * local_maxes / num_pitches) / 8.0f; 00090 else 00091 return l / 3.0f + (1.0f - o) / 4.0f + (1.0f - 2.0f * local_maxes / num_pitches) / 8.0f; 00092 } 00093 00094 //! returns the value at @a x of a gaussian with the parameters @a mu and @a sigma 00095 static inline float gaussian_pdf(float mu, float sigma, float x) { 00096 float dist = x - mu; 00097 return std::exp(-dist * dist / (2.0f * sigma * sigma)) / (sqrt_2_pi * sigma); 00098 } 00099 00100 //! returns a string representing the musical note of a given frequency 00101 static const char *pitch_name(unsigned int i) { 00102 static const char *pitch_names[12] = { 00103 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#" 00104 }; 00105 00106 return pitch_names[i % 12]; 00107 } 00108 00109 //! unknown calculation, does some kind of rescaling of @a v (/ejt) 00110 static void hamming(NEWMAT::ColumnVector &v) { 00111 for (int i = 0; i != v.size(); ++i) 00112 v.element(i) = v.element(i) * (0.53836f - 0.46164f * cosf(2 * float(M_PI) * i / (v.size() - 1))); 00113 } 00114 00115 private: 00116 PitchDetector(const PitchDetector&); //!< don't call (copy constructor) 00117 PitchDetector& operator=(const PitchDetector&); //!< don't call (assignment operator) 00118 }; 00119 00120 /*! @file 00121 * @brief Defines PitchDetector, which generates a PitchEvent whenever a notable frequency is detected using FFT 00122 * @author Matus Telgarsky and Jonah Sherman (Creators) 00123 * @author Ethan Tira-Thompson (imported into framework) 00124 * 00125 * Originally written as a part of a final project at Carnegie Mellon (15-494 Cognitive Robotics, Spring 2006) 00126 */ 00127 00128 #endif |
Tekkotsu v5.1CVS |
Generated Mon May 9 04:58:46 2016 by Doxygen 1.6.3 |