audacia/src/VoiceKey.h

105 lines
3.5 KiB
C++

/***************************************************************************
Audacity: A Digtial Audio Editor
VoiceKey.h: a class implementing a voice key
(c) 2002-2005 Shane T. Mueller
Distributed under the terms of the GPL Version 2 or later.
***************************************************************************/
#ifndef __AUDACITY_VOICEKEY__
#define __AUDACITY_VOICEKEY__
#ifndef M_PI
#define M_PI 3.14159265358979323846 /* pi */
#endif
#include "audacity/Types.h"
class WaveTrack;
enum VoiceKeyTypes
{
VKT_NONE = 0,
VKT_ENERGY = 1,
VKT_SIGN_CHANGES_LOW = 2,
VKT_SIGN_CHANGES_HIGH = 4,
VKT_DIRECTION_CHANGES_LOW = 8,
VKT_DIRECTION_CHANGES_HIGH = 16
};
class VoiceKey {
public:
VoiceKey();
~VoiceKey();
sampleCount OnForward (const WaveTrack & t, sampleCount start, sampleCount len);
sampleCount OnBackward (const WaveTrack & t, sampleCount start, sampleCount len);
sampleCount OffForward (const WaveTrack & t, sampleCount start, sampleCount len);
sampleCount OffBackward (const WaveTrack & t, sampleCount start, sampleCount len);
void CalibrateNoise(const WaveTrack & t, sampleCount start, sampleCount len);
void AdjustThreshold(double t);
bool AboveThreshold(const WaveTrack & t, sampleCount start,sampleCount len);
void SetKeyType(bool erg, bool scLow, bool scHigh,
bool dcLow, bool dcHigh);
private:
double mWindowSize; //Size of analysis window, in milliseconds
double mThresholdAdjustment; //User-accessible sensitivity calibration variable
double mEnergyMean;
double mEnergySD;
double mSignChangesMean;
double mSignChangesSD;
double mDirectionChangesMean;
double mDirectionChangesSD;
double mThresholdEnergy; // Threshold[*] is equal to [*]Mean + [*]SD * ThresholdAdjustment
double mThresholdSignChangesLower;
double mThresholdSignChangesUpper;
double mThresholdDirectionChangesLower;
double mThresholdDirectionChangesUpper;
//These determine which statistics should be used.
bool mUseEnergy;
bool mUseSignChangesLow;
bool mUseSignChangesHigh;
bool mUseDirectionChangesLow;
bool mUseDirectionChangesHigh;
double mSilentWindowSize; //Time in milliseconds of below-threshold windows required for silence
double mSignalWindowSize; //Time in milliseconds of above-threshold windows required for speech
double TestEnergy (const WaveTrack & t, sampleCount start,sampleCount len);
double TestSignChanges (
const WaveTrack & t, sampleCount start, sampleCount len);
double TestDirectionChanges(
const WaveTrack & t, sampleCount start, sampleCount len);
void TestEnergyUpdate (double & prevErg, int length, const float & drop, const float & add);
void TestSignChangesUpdate(double & currentsignchanges,int length, const float & a1,
const float & a2, const float & z1, const float & z2);
void TestDirectionChangesUpdate(double & currentdirectionchanges,int length,
int & atrend, const float & a1, const float & a2,
int & ztrend, const float & z1, const float & z2);
};
inline int sgn(int number){ return (number<0) ? -1: 1;}
//This returns a logistic density based on a z-score
// a logistic distn has variance (pi*s)^2/3
//inline float inline float logistic(float z){ return fexp(-1 * z/(pi / sqrt(3)) / (1 + pow(fexp(-1 * z(pi / sqrt(3))),2)));}
#endif