Tekkotsu Homepage
Demos
Overview
Downloads
Dev. Resources
Reference
Credits

PitchDetector.cc

Go to the documentation of this file.
00001 #include "PitchDetector.h"
00002 #include "Events/EventRouter.h"
00003 #include "Events/EventBase.h"
00004 #include "Events/DataEvent.h"
00005 #include "Events/PitchEvent.h"
00006 #include "Shared/newmat/newmatap.h"
00007 #include "Shared/Config.h"
00008 #include "Shared/debuget.h"
00009 
00010 #include "Shared/ODataFormats.h"
00011 #ifdef PLATFORM_APERIOS
00012 #  include "OPENR/OPENRAPI.h"
00013 #endif
00014 
00015 //better to put this here instead of the header
00016 using namespace std; 
00017 
00018 const unsigned int PitchDetector::fft_frames = 4; // number frames to use for fft
00019 const unsigned int PitchDetector::num_pitches = 60; // 5 octaves
00020 const float PitchDetector::base_pitch = 110.0f; // two octaves below tuning A (440)
00021 const float PitchDetector::half_step = 1.0594630943593f; // twelfth root of two
00022 const float PitchDetector::sqrt_2_pi = 2.506628274631f; // \sqrt{2\pi}
00023 
00024 PitchDetector::~PitchDetector() {
00025   ASSERT(pitch_info==NULL,"pitch_info wasn't deleted before destructor");
00026   ASSERT(pitch_bin==NULL,"pitch_bin wasn't deleted before destructor");
00027 }
00028 
00029 void PitchDetector::DoStart() {
00030   EventGeneratorBase::DoStart(); // do this first (required)
00031   
00032   ASSERT(pitch_info==NULL,"pitch_info was already allocated?");
00033   ASSERT(pitch_bin==NULL,"pitch_bin was already allocated?");
00034   
00035   pitch_info = new PitchInfo[num_pitches];
00036   for (unsigned int i = 0; i != num_pitches; ++i) {
00037     float freq = base_pitch * powf(half_step, i);
00038     pitch_info[i].freq = freq;
00039     pitch_info[i].sigma = sqrtf((freq * half_step - freq) / 0.5f);
00040     pitch_info[i].duration = 0;
00041   }
00042 
00043   // processEvent will initialize before use
00044   pitch_bin = new float[num_pitches];
00045 
00046   cur_frame = 0;
00047   have_fft = false;
00048   //printf("writing to file..\n");
00049   //fprintf(fft_file, "\n======starting fft collection=======\n");
00050   //printf("done writing to file..\n");
00051 }
00052 
00053 void PitchDetector::DoStop() {
00054   //fclose(fft_file);
00055   if(pitch_info!=NULL) {
00056     delete [] pitch_info;
00057     pitch_info=NULL;
00058   }
00059   if(pitch_bin!=NULL) {
00060     delete [] pitch_bin;
00061     pitch_bin=NULL;
00062   }
00063   EventGeneratorBase::DoStop(); // do this last (required)
00064 }
00065 
00066 void PitchDetector::processEvent(const EventBase& event) {
00067   if( event.getGeneratorID() != EventBase::micOSndEGID) {
00068     EventGeneratorBase::processEvent(event);
00069     return;
00070   }
00071   
00072   // Get to the sound buffer
00073   // getData() is not specified for const data
00074   unsigned int i, j;
00075   const DataEvent<const OSoundVectorData*> *de = reinterpret_cast<const DataEvent<const OSoundVectorData*>*>( &event);
00076   
00077   OSoundVectorData *svd = const_cast<OSoundVectorData*>(de->getData());
00078   const short *d = ( const short *)svd->GetData(0);
00079   
00080   if ( ! frame_sz ) { /* we need to initialize _everything_ */
00081     //printf("building vectors for first time..\n");
00082     frame_sz = svd->GetInfo(0)->frameSize;
00083     rate = svd->GetInfo(0)->samplingRate;
00084     //printf("frame_sz %d, rate %d\n",frame_sz,rate);
00085     win_sz = frame_sz * fft_frames;
00086     
00087     left.ReSize(win_sz);
00088     right.ReSize(win_sz);
00089     iml.ReSize(win_sz / 2 + 1);
00090     imr.ReSize(win_sz / 2 + 1);
00091     rel.ReSize(win_sz / 2 + 1);
00092     rer.ReSize(win_sz / 2 + 1);
00093     pol.ReSize(win_sz / 2 + 1);
00094     por.ReSize(win_sz / 2 + 1);
00095     po.ReSize(win_sz / 2 + 1);
00096   }
00097   
00098   //printf("saving audio data to vectors [%u]..\n", cur_frame);
00099   for (i = 0; i != frame_sz; ++i) {
00100     left ((cur_frame * frame_sz) + i + 1) = d[(i<<1)  ];
00101     right((cur_frame * frame_sz) + i + 1) = d[(i<<1)+1];
00102   }
00103   
00104   if (++cur_frame == fft_frames) {
00105     cur_frame = 0;
00106     
00107     hamming(left);
00108     hamming(right);
00109     
00110     //printf("calling fft!\n");
00111     NEWMAT::RealFFT(left, rel, iml);
00112     NEWMAT::RealFFT(right, rer, imr);
00113     for (i = 1; i <= win_sz / 2 + 1; ++i) {
00114       NEWMAT::Real a, b;
00115       a = rel(i);
00116       b = iml(i);
00117       pol(i) = sqrtf(a*a + b*b);
00118       a = rer(i);
00119       b = imr(i);
00120       por(i) = sqrtf(a*a + b*b);
00121       //based on whether stereo info actually used, remove all
00122       //stereo separation altogether or merge even later..
00123       po(i) = (pol(i) + por(i)) / 2;
00124       
00125       //fprintf(fft_file, "[frequency %f] pow[%d] = %f\n", rate * i * 1.0 / win_sz, i, po(i));
00126     }
00127     have_fft = true;
00128     
00129   } else if (cur_frame == 1 && have_fft) { //hack to split processing..
00130     float mean = 0.0f;
00131     unsigned int max = 0; //if we see this value twice.. oops
00132     //turbo slow for now..
00133     //printf("building pitch bins!\n");
00134     local_maxes = 0;
00135     for (i = 0; i != num_pitches; ++i) {
00136       float sigma = pitch_info[i].sigma;
00137       float freq = pitch_info[i].freq;
00138       float bin = 0.0f;
00139       
00140       for (j = 1; j <= win_sz / 2 + 1; ++j)
00141         bin += po(j) * gaussian_pdf(j * rate * 1.0 / win_sz, sigma, freq);
00142       mean += (pitch_bin[i] = bin);
00143       
00144       
00145       //prep for global max check
00146       max = (bin > pitch_bin[max]) ? i : max;
00147       
00148       //check if prev a local max
00149       pitch_info[i].local_max = pitch_info[i].global_max = 0.0f;
00150       if (i == 1) {
00151         float prev = pitch_bin[i-1];
00152         if (bin < prev) {
00153           pitch_info[0].local_max = 1.0f - (bin / prev);
00154           ++local_maxes;
00155         }
00156       } else if (i > 1) {
00157         float a = pitch_bin[i-2], b = pitch_bin[i-1], c = pitch_bin[i];
00158         if (b > a && b > c) {
00159           pitch_info[i-1].local_max = 1.0f - (a + c) / (2.f * b);
00160           ++local_maxes;
00161         }
00162       }
00163       if (i == num_pitches - 1) { //intentionally not else-if !
00164         float prev = pitch_bin[i - 1];
00165         if (bin > prev) {
00166           pitch_info[i].local_max = 1.0f - (prev / bin);
00167           ++local_maxes;
00168         }
00169       }
00170     }
00171     mean /= num_pitches;
00172     
00173     pitch_info[max].global_max = 1.0f - mean / pitch_bin[max];
00174     
00175     //final pass through on whether this is a pitch or not.
00176     //compute overtone properties and confidence..
00177     for (i = 0; i < num_pitches; ++i) {
00178       float c, f = 1.0f;
00179       if (i % 4 && is_pitch(confidence(i/4, pitch_bin[i/4])))
00180         f /= 2.0f;
00181       if (i % 3 && is_pitch(confidence(i/3, pitch_bin[i/3])))
00182         f /= 2.0f;
00183       if (i % 3 && is_pitch(confidence(i*2/3, pitch_bin[i*2/3])))
00184         f /= 2.0f;
00185       if (i % 2 && is_pitch(confidence(i/2, pitch_bin[i/2])))
00186         f /= 2.0f;
00187       pitch_info[i].overtone = 1.0f - f;
00188       pitch_info[i].confidence = (c = confidence(i, pitch_bin[i]));
00189       
00190       if (is_pitch(c)) {
00191         //printf("pitch number %u, frequency %f, name %s, confidence %f went on\nstrength %f gmax %f lmax %f otone %f lmaxes %u\n", i, pitch_info[i].freq, pitch_name(i), c, pitch_bin[i],pitch_info[i].global_max, pitch_info[i].local_max, pitch_info[i].overtone, local_maxes);
00192         EventBase::EventTypeID_t type = ( ! pitch_info[i].duration ) ? EventBase::activateETID : EventBase::statusETID;
00193         pitch_info[i].amplitude = (pitch_info[i].amplitude*pitch_info[i].duration + pitch_bin[i]) / (pitch_info[i].duration + 1);
00194         ++pitch_info[i].duration;
00195         erouter->postEvent(PitchEvent(reinterpret_cast<size_t>(this), type, pitch_info[i].freq, pitch_name(i), pitch_bin[i], pitch_info[i].duration*win_sz*1000/rate, c));
00196       } else {
00197         if (pitch_info[i].duration) {
00198           //printf("pitch number %u, frequency %f, name %s, confidence %f, duration %u went off \n",i, pitch_info[i].freq, pitch_name(i), c, pitch_info[i].duration);
00199           erouter->postEvent(PitchEvent(reinterpret_cast<size_t>(this), EventBase::deactivateETID,pitch_info[i].freq, pitch_name(i),pitch_info[i].amplitude,pitch_info[i].duration*win_sz*1000/rate,c));
00200           pitch_info[i].duration = 0;
00201           pitch_info[i].amplitude = 0;
00202         }
00203       }
00204       //fprintf(fft_file, "pitch %d freq %f name %s amp %f dur %u\n\tgmax %f lmax %f otone %f lmaxes %u confidence %f\n",i, pitch_info[i].freq, pitch_name(i),pitch_bin[i], pitch_info[i].duration,pitch_info[i].global_max, pitch_info[i].local_max,pitch_info[i].overtone, local_maxes, c);
00205     }
00206   }
00207   //printf("done with mic event in class Pitch\n");
00208 }
00209 
00210 bool PitchDetector::is_pitch(float conf) {
00211   return (conf >= config->sound.pitchConfidenceThreshold);
00212 }
00213 
00214 
00215 
00216 /*! @file
00217  * @brief Implements PitchDetector, which generates a PitchEvent whenever a notable frequency is detected using FFT
00218  * @author Matus Telgarsky and Jonah Sherman (Creators)
00219  * @author Ethan Tira-Thompson (imported into framework)
00220  *
00221  * Originally written as a part of a final project at Carnegie Mellon (15-494 Cognitive Robotics, Spring 2006)
00222  *
00223  * $Author: ejt $
00224  * $Name: tekkotsu-4_0 $
00225  * $Revision: 1.6 $
00226  * $State: Exp $
00227  * $Date: 2007/11/13 04:16:04 $
00228  */

Tekkotsu v4.0
Generated Thu Nov 22 00:54:54 2007 by Doxygen 1.5.4