00001 #include "PitchDetector.h"
00002 #include "Events/EventRouter.h"
00003 #include "Events/EventBase.h"
00004 #include "Events/DataEvent.h"
00005 #include "Events/PitchEvent.h"
00006 #include "Shared/newmat/newmatap.h"
00007 #include "Shared/Config.h"
00008 #include "Shared/debuget.h"
00009
00010 #include "Shared/ODataFormats.h"
00011 #ifdef PLATFORM_APERIOS
00012 # include "OPENR/OPENRAPI.h"
00013 #endif
00014
00015
00016 using namespace std;
00017
00018 const unsigned int PitchDetector::fft_frames = 4;
00019 const unsigned int PitchDetector::num_pitches = 60;
00020 const float PitchDetector::base_pitch = 110.0f;
00021 const float PitchDetector::half_step = 1.0594630943593f;
00022 const float PitchDetector::sqrt_2_pi = 2.506628274631f;
00023
00024 PitchDetector::~PitchDetector() {
00025 ASSERT(pitch_info==NULL,"pitch_info wasn't deleted before destructor");
00026 ASSERT(pitch_bin==NULL,"pitch_bin wasn't deleted before destructor");
00027 }
00028
00029 void PitchDetector::DoStart() {
00030 EventGeneratorBase::DoStart();
00031
00032 ASSERT(pitch_info==NULL,"pitch_info was already allocated?");
00033 ASSERT(pitch_bin==NULL,"pitch_bin was already allocated?");
00034
00035 pitch_info = new PitchInfo[num_pitches];
00036 for (unsigned int i = 0; i != num_pitches; ++i) {
00037 float freq = base_pitch * powf(half_step, i);
00038 pitch_info[i].freq = freq;
00039 pitch_info[i].sigma = sqrtf((freq * half_step - freq) / 0.5f);
00040 pitch_info[i].duration = 0;
00041 }
00042
00043
00044 pitch_bin = new float[num_pitches];
00045
00046 cur_frame = 0;
00047 have_fft = false;
00048
00049
00050
00051 }
00052
00053 void PitchDetector::DoStop() {
00054
00055 if(pitch_info!=NULL) {
00056 delete [] pitch_info;
00057 pitch_info=NULL;
00058 }
00059 if(pitch_bin!=NULL) {
00060 delete [] pitch_bin;
00061 pitch_bin=NULL;
00062 }
00063 EventGeneratorBase::DoStop();
00064 }
00065
00066 void PitchDetector::processEvent(const EventBase& event) {
00067 if( event.getGeneratorID() != EventBase::micOSndEGID) {
00068 EventGeneratorBase::processEvent(event);
00069 return;
00070 }
00071
00072
00073
00074 unsigned int i, j;
00075 const DataEvent<const OSoundVectorData*> *de = reinterpret_cast<const DataEvent<const OSoundVectorData*>*>( &event);
00076
00077 OSoundVectorData *svd = const_cast<OSoundVectorData*>(de->getData());
00078 const short *d = ( const short *)svd->GetData(0);
00079
00080 if ( ! frame_sz ) {
00081
00082 frame_sz = svd->GetInfo(0)->frameSize;
00083 rate = svd->GetInfo(0)->samplingRate;
00084
00085 win_sz = frame_sz * fft_frames;
00086
00087 left.ReSize(win_sz);
00088 right.ReSize(win_sz);
00089 iml.ReSize(win_sz / 2 + 1);
00090 imr.ReSize(win_sz / 2 + 1);
00091 rel.ReSize(win_sz / 2 + 1);
00092 rer.ReSize(win_sz / 2 + 1);
00093 pol.ReSize(win_sz / 2 + 1);
00094 por.ReSize(win_sz / 2 + 1);
00095 po.ReSize(win_sz / 2 + 1);
00096 }
00097
00098
00099 for (i = 0; i != frame_sz; ++i) {
00100 left ((cur_frame * frame_sz) + i + 1) = d[(i<<1) ];
00101 right((cur_frame * frame_sz) + i + 1) = d[(i<<1)+1];
00102 }
00103
00104 if (++cur_frame == fft_frames) {
00105 cur_frame = 0;
00106
00107 hamming(left);
00108 hamming(right);
00109
00110
00111 NEWMAT::RealFFT(left, rel, iml);
00112 NEWMAT::RealFFT(right, rer, imr);
00113 for (i = 1; i <= win_sz / 2 + 1; ++i) {
00114 NEWMAT::Real a, b;
00115 a = rel(i);
00116 b = iml(i);
00117 pol(i) = sqrtf(a*a + b*b);
00118 a = rer(i);
00119 b = imr(i);
00120 por(i) = sqrtf(a*a + b*b);
00121
00122
00123 po(i) = (pol(i) + por(i)) / 2;
00124
00125
00126 }
00127 have_fft = true;
00128
00129 } else if (cur_frame == 1 && have_fft) {
00130 float mean = 0.0f;
00131 unsigned int max = 0;
00132
00133
00134 local_maxes = 0;
00135 for (i = 0; i != num_pitches; ++i) {
00136 float sigma = pitch_info[i].sigma;
00137 float freq = pitch_info[i].freq;
00138 float bin = 0.0f;
00139
00140 for (j = 1; j <= win_sz / 2 + 1; ++j)
00141 bin += po(j) * gaussian_pdf(j * rate * 1.0 / win_sz, sigma, freq);
00142 mean += (pitch_bin[i] = bin);
00143
00144
00145
00146 max = (bin > pitch_bin[max]) ? i : max;
00147
00148
00149 pitch_info[i].local_max = pitch_info[i].global_max = 0.0f;
00150 if (i == 1) {
00151 float prev = pitch_bin[i-1];
00152 if (bin < prev) {
00153 pitch_info[0].local_max = 1.0f - (bin / prev);
00154 ++local_maxes;
00155 }
00156 } else if (i > 1) {
00157 float a = pitch_bin[i-2], b = pitch_bin[i-1], c = pitch_bin[i];
00158 if (b > a && b > c) {
00159 pitch_info[i-1].local_max = 1.0f - (a + c) / (2.f * b);
00160 ++local_maxes;
00161 }
00162 }
00163 if (i == num_pitches - 1) {
00164 float prev = pitch_bin[i - 1];
00165 if (bin > prev) {
00166 pitch_info[i].local_max = 1.0f - (prev / bin);
00167 ++local_maxes;
00168 }
00169 }
00170 }
00171 mean /= num_pitches;
00172
00173 pitch_info[max].global_max = 1.0f - mean / pitch_bin[max];
00174
00175
00176
00177 for (i = 0; i < num_pitches; ++i) {
00178 float c, f = 1.0f;
00179 if (i % 4 && is_pitch(confidence(i/4, pitch_bin[i/4])))
00180 f /= 2.0f;
00181 if (i % 3 && is_pitch(confidence(i/3, pitch_bin[i/3])))
00182 f /= 2.0f;
00183 if (i % 3 && is_pitch(confidence(i*2/3, pitch_bin[i*2/3])))
00184 f /= 2.0f;
00185 if (i % 2 && is_pitch(confidence(i/2, pitch_bin[i/2])))
00186 f /= 2.0f;
00187 pitch_info[i].overtone = 1.0f - f;
00188 pitch_info[i].confidence = (c = confidence(i, pitch_bin[i]));
00189
00190 if (is_pitch(c)) {
00191
00192 EventBase::EventTypeID_t type = ( ! pitch_info[i].duration ) ? EventBase::activateETID : EventBase::statusETID;
00193 pitch_info[i].amplitude = (pitch_info[i].amplitude*pitch_info[i].duration + pitch_bin[i]) / (pitch_info[i].duration + 1);
00194 ++pitch_info[i].duration;
00195 erouter->postEvent(PitchEvent(reinterpret_cast<size_t>(this), type, pitch_info[i].freq, pitch_name(i), pitch_bin[i], pitch_info[i].duration*win_sz*1000/rate, c));
00196 } else {
00197 if (pitch_info[i].duration) {
00198
00199 erouter->postEvent(PitchEvent(reinterpret_cast<size_t>(this), EventBase::deactivateETID,pitch_info[i].freq, pitch_name(i),pitch_info[i].amplitude,pitch_info[i].duration*win_sz*1000/rate,c));
00200 pitch_info[i].duration = 0;
00201 pitch_info[i].amplitude = 0;
00202 }
00203 }
00204
00205 }
00206 }
00207
00208 }
00209
00210 bool PitchDetector::is_pitch(float conf) {
00211 return (conf >= config->sound.pitchConfidenceThreshold);
00212 }
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228