PitchDetector.cc
Go to the documentation of this file.00001 #include "PitchDetector.h"
00002 #include "Events/EventRouter.h"
00003 #include "Events/EventBase.h"
00004 #include "Events/DataEvent.h"
00005 #include "Events/PitchEvent.h"
00006 #include "Shared/newmat/newmatap.h"
00007 #include "Shared/Config.h"
00008 #include "Shared/debuget.h"
00009
00010 #include "Shared/ODataFormats.h"
00011 #ifdef PLATFORM_APERIOS
00012 # include "OPENR/OPENRAPI.h"
00013 #endif
00014
00015 REGISTER_BEHAVIOR_MENU_OPT(PitchDetector,"Background Behaviors/System Daemons",BEH_NONEXCLUSIVE|BEH_START);
00016
00017 using namespace std;
00018
00019 const unsigned int PitchDetector::fft_frames = 4;
00020 const unsigned int PitchDetector::num_pitches = 60;
00021 const float PitchDetector::base_pitch = 110.0f;
00022 const float PitchDetector::half_step = 1.0594630943593f;
00023 const float PitchDetector::sqrt_2_pi = 2.506628274631f;
00024
00025 PitchDetector::~PitchDetector() {
00026 ASSERT(pitch_info==NULL,"pitch_info wasn't deleted before destructor");
00027 ASSERT(pitch_bin==NULL,"pitch_bin wasn't deleted before destructor");
00028 }
00029
00030 void PitchDetector::doStart() {
00031 EventGeneratorBase::doStart();
00032
00033 ASSERT(pitch_info==NULL,"pitch_info was already allocated?");
00034 ASSERT(pitch_bin==NULL,"pitch_bin was already allocated?");
00035
00036 pitch_info = new PitchInfo[num_pitches];
00037 for (unsigned int i = 0; i != num_pitches; ++i) {
00038 float freq = base_pitch * powf(half_step, i);
00039 pitch_info[i].freq = freq;
00040 pitch_info[i].sigma = sqrtf((freq * half_step - freq) / 0.5f);
00041 pitch_info[i].duration = 0;
00042 }
00043
00044
00045 pitch_bin = new float[num_pitches];
00046
00047 cur_frame = 0;
00048 have_fft = false;
00049
00050
00051
00052 }
00053
00054 void PitchDetector::doStop() {
00055
00056 if(pitch_info!=NULL) {
00057 delete [] pitch_info;
00058 pitch_info=NULL;
00059 }
00060 if(pitch_bin!=NULL) {
00061 delete [] pitch_bin;
00062 pitch_bin=NULL;
00063 }
00064 EventGeneratorBase::doStop();
00065 }
00066
00067 void PitchDetector::doEvent() {
00068 if( event->getGeneratorID() != EventBase::micOSndEGID)
00069 return;
00070
00071
00072
00073 unsigned int i, j;
00074 const DataEvent<const OSoundVectorData*> *de = reinterpret_cast<const DataEvent<const OSoundVectorData*>*>( &event);
00075
00076 OSoundVectorData *svd = const_cast<OSoundVectorData*>(de->getData());
00077 const short *d = ( const short *)svd->GetData(0);
00078
00079 if ( ! frame_sz ) {
00080
00081 frame_sz = svd->GetInfo(0)->frameSize;
00082 rate = svd->GetInfo(0)->samplingRate;
00083
00084 win_sz = frame_sz * fft_frames;
00085
00086 left.ReSize(win_sz);
00087 right.ReSize(win_sz);
00088 iml.ReSize(win_sz / 2 + 1);
00089 imr.ReSize(win_sz / 2 + 1);
00090 rel.ReSize(win_sz / 2 + 1);
00091 rer.ReSize(win_sz / 2 + 1);
00092 pol.ReSize(win_sz / 2 + 1);
00093 por.ReSize(win_sz / 2 + 1);
00094 po.ReSize(win_sz / 2 + 1);
00095 }
00096
00097
00098 for (i = 0; i != frame_sz; ++i) {
00099 left ((cur_frame * frame_sz) + i + 1) = d[(i<<1) ];
00100 right((cur_frame * frame_sz) + i + 1) = d[(i<<1)+1];
00101 }
00102
00103 if (++cur_frame == fft_frames) {
00104 cur_frame = 0;
00105
00106 hamming(left);
00107 hamming(right);
00108
00109
00110 NEWMAT::RealFFT(left, rel, iml);
00111 NEWMAT::RealFFT(right, rer, imr);
00112 for (i = 1; i <= win_sz / 2 + 1; ++i) {
00113 NEWMAT::Real a, b;
00114 a = rel(i);
00115 b = iml(i);
00116 pol(i) = sqrtf(a*a + b*b);
00117 a = rer(i);
00118 b = imr(i);
00119 por(i) = sqrtf(a*a + b*b);
00120
00121
00122 po(i) = (pol(i) + por(i)) / 2;
00123
00124
00125 }
00126 have_fft = true;
00127
00128 } else if (cur_frame == 1 && have_fft) {
00129 float mean = 0.0f;
00130 unsigned int max = 0;
00131
00132
00133 local_maxes = 0;
00134 for (i = 0; i != num_pitches; ++i) {
00135 float sigma = pitch_info[i].sigma;
00136 float freq = pitch_info[i].freq;
00137 float bin = 0.0f;
00138
00139 for (j = 1; j <= win_sz / 2 + 1; ++j)
00140 bin += po(j) * gaussian_pdf(j * rate * 1.f / win_sz, sigma, freq);
00141 mean += (pitch_bin[i] = bin);
00142
00143
00144
00145 max = (bin > pitch_bin[max]) ? i : max;
00146
00147
00148 pitch_info[i].local_max = pitch_info[i].global_max = 0.0f;
00149 if (i == 1) {
00150 float prev = pitch_bin[i-1];
00151 if (bin < prev) {
00152 pitch_info[0].local_max = 1.0f - (bin / prev);
00153 ++local_maxes;
00154 }
00155 } else if (i > 1) {
00156 float a = pitch_bin[i-2], b = pitch_bin[i-1], c = pitch_bin[i];
00157 if (b > a && b > c) {
00158 pitch_info[i-1].local_max = 1.0f - (a + c) / (2.f * b);
00159 ++local_maxes;
00160 }
00161 }
00162 if (i == num_pitches - 1) {
00163 float prev = pitch_bin[i - 1];
00164 if (bin > prev) {
00165 pitch_info[i].local_max = 1.0f - (prev / bin);
00166 ++local_maxes;
00167 }
00168 }
00169 }
00170 mean /= num_pitches;
00171
00172 pitch_info[max].global_max = 1.0f - mean / pitch_bin[max];
00173
00174
00175
00176 for (i = 0; i < num_pitches; ++i) {
00177 float c, f = 1.0f;
00178 if (i % 4 && is_pitch(confidence(i/4, pitch_bin[i/4])))
00179 f /= 2.0f;
00180 if (i % 3 && is_pitch(confidence(i/3, pitch_bin[i/3])))
00181 f /= 2.0f;
00182 if (i % 3 && is_pitch(confidence(i*2/3, pitch_bin[i*2/3])))
00183 f /= 2.0f;
00184 if (i % 2 && is_pitch(confidence(i/2, pitch_bin[i/2])))
00185 f /= 2.0f;
00186 pitch_info[i].overtone = 1.0f - f;
00187 pitch_info[i].confidence = (c = confidence(i, pitch_bin[i]));
00188
00189 if (is_pitch(c)) {
00190
00191 EventBase::EventTypeID_t type = ( ! pitch_info[i].duration ) ? EventBase::activateETID : EventBase::statusETID;
00192 pitch_info[i].amplitude = (pitch_info[i].amplitude*pitch_info[i].duration + pitch_bin[i]) / (pitch_info[i].duration + 1);
00193 ++pitch_info[i].duration;
00194 erouter->postEvent(PitchEvent(reinterpret_cast<size_t>(this), type, pitch_info[i].freq, pitch_name(i), pitch_bin[i], pitch_info[i].duration*win_sz*1000/rate, c));
00195 } else {
00196 if (pitch_info[i].duration) {
00197
00198 erouter->postEvent(PitchEvent(reinterpret_cast<size_t>(this), EventBase::deactivateETID,pitch_info[i].freq, pitch_name(i),pitch_info[i].amplitude,pitch_info[i].duration*win_sz*1000/rate,c));
00199 pitch_info[i].duration = 0;
00200 pitch_info[i].amplitude = 0;
00201 }
00202 }
00203
00204 }
00205 }
00206
00207 }
00208
00209 bool PitchDetector::is_pitch(float conf) {
00210 return (conf >= config->sound.pitchConfidenceThreshold);
00211 }
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221