1102 lines
45 KiB
C
1102 lines
45 KiB
C
/* cmupv.c -- phase vocoder */
|
|
|
|
/* Computation is driven by demands for output. The client calls either
|
|
pv_get_output() or pv_get_output2(). Either way, output is returned
|
|
one blocksize at a time. The blocksize is set by pv_set_blocksize()
|
|
and defaults to the synthesis hopsize, which defaults to fftsize/8.
|
|
|
|
Since the blocksize and hopsize are not necessarily matched in any
|
|
way, there is a buffer to accumulate the overlapping "grains" of sound
|
|
which we call the synthesis frames (each synthesis frame is computed
|
|
by adjusting phases of an analysis frame). The buffer is called
|
|
output_buffer, and the length (in floats) is output_buffer_len.
|
|
|
|
The output_buffer_len has to be big enough to contain blocksize samples
|
|
which are about to be returned as output plus fftsize samples which
|
|
overlap with future synthesis frames that will be added later. The
|
|
output_buffer_len also has to be hopsize + fftsize samples (probably
|
|
these worst-case sizes are conservative, but it's easier to set
|
|
workable upper bounds than to think about all the special cases when
|
|
output_buffer_len, blocksize, hopsize, and fftsize are all arbitrary).
|
|
|
|
The basic structure to produce blocksize samples is as follows:
|
|
out_next is the pointer to the next block of blocksize samples. This
|
|
is a pointer directly into output_buffer, initially the first sample
|
|
in output_buffer.
|
|
|
|
We also keep a pointer frame_next, which is where the next synthesis
|
|
frame will be added into output_buffer. Thus, the number of samples
|
|
that have been completely computed but not output (i.e. no more
|
|
overlapping frames will be added) is frame_next - out_next. When
|
|
(frame_next - out_next) > blocksize, we can deliver blocksize samples
|
|
to the caller.
|
|
|
|
The next constraint is that as we add overlapping synthesis frames into
|
|
output_buffer, we have to have room for them, so frame_next + fftsize
|
|
must not be greater than output_buffer + output_buffer_len. If this
|
|
constraint is not met, we need to shift everything toward the beginning
|
|
of the output_buffer. Since we've output everything up to out_next, we
|
|
shift by out_next - output_buffer.
|
|
|
|
Because output_buffer_len >= blocksize + fftsize, we are guaranteed that
|
|
there is always room to add in any synthesis frames that overlap the
|
|
blocksize samples to be returned next.
|
|
|
|
PHASE COHERENCE
|
|
|
|
To preserve phase relationships when a sine spills over multiple bins
|
|
(which happens always due to windowing), we adjust phase based on peaks
|
|
in the magnitude spectrum. The phases of neighboring bins are adjusted
|
|
by the same angle.
|
|
|
|
Specification: We want to divide the spectrum into peaks: between local
|
|
minima, adjust according to the peak.
|
|
|
|
Algorithm: Set "previous" minimum to -1 and previous magnitude to 0
|
|
- search for the "previous" peak
|
|
- iteratively do the following:
|
|
- search for the next minimum
|
|
- search for the next peak
|
|
- compute range of bins to modify, assign minimum to the largest peak
|
|
- compute the phase adjustment for the next peak
|
|
- apply the phase adjustment to range of bins
|
|
- set previous minumum to next minimum
|
|
- set previous peak to next peak
|
|
*/
|
|
|
|
/* BEGIN PV_SINE_TEST debugging code...
|
|
This debugging code is for testing a special case: the input is a sine tone
|
|
with amplitude about 1.0 and frequency about 689Hz at 44100Hz sample rate,
|
|
resulting in a period of exactly 64 samples, which will be bin 8 with an
|
|
fftsize of 512.
|
|
The goal here is to compute exactly what the phasevocoder should be doing
|
|
so we can compare to what it does. When SINETEST is enabled, data will be
|
|
printed. At the time this code is being written, the result with a stretch
|
|
factor of exactly 1.1 is a pulsing sound when the absolute interface is used.
|
|
With the absolute interface, the analysis hopsize is mostly 58, with a hop of
|
|
59 samples every 5 or 6 frames.
|
|
*/
|
|
// #define PV_SINE_TEST 1
|
|
#ifdef PV_SINE_TEST
|
|
double pvst_frequency = 44100.0 / 64.0; // about 689Hz
|
|
long pvst_offset = -1000; // -1 means we haven't seen first frame yet
|
|
// the first hop size is bogus, so we ignore it, then accumulate hop sizes here
|
|
#endif
|
|
/* END PV_SINE_TEST */
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <assert.h>
|
|
#include <stdlib.h>
|
|
#include "internal.h"
|
|
// only needed for some debugging code which is probably commented out
|
|
#include "cmupvdbg.h"
|
|
|
|
// define PHASE_FIX to tie neighboring bin phase to phase of spectral peaks
|
|
// #define PHASE_FIX 1
|
|
|
|
// debugging output on or off
|
|
// #define D if (1)
|
|
#define D if (0)
|
|
|
|
// more debugging
|
|
#define DD if (0)
|
|
|
|
#define LOGFFTSIZE_DEFAULT 11
|
|
#define RATIO_DEFAULT 1
|
|
#define BOOL int
|
|
#define FALSE 0
|
|
#define TRUE 1
|
|
#define TWOPI (2.0 * M_PI)
|
|
|
|
// These macros are used for memory allocation and freeing. Note that
|
|
// only ZERO acts like a function. The rest only work on fields of pv.
|
|
#define PVFREE(field) if (pv->field) { pv->free(pv->field); pv->field = NULL; }
|
|
#define PVALLOC(field, size) \
|
|
pv->field = (float *)pv->malloc((size) * sizeof(*(pv->field)))
|
|
#define PVREALLOC(field, size) { PVFREE(field); PVALLOC(field, size); }
|
|
#define ZERO(array, size) memset((array), 0, (size) * sizeof(*(array)))
|
|
|
|
|
|
#include "cmupv.h"
|
|
#include "fftext.h"
|
|
|
|
typedef enum {
|
|
PV_UNINITIALIZED,
|
|
PV_START,
|
|
PV_GOT_COUNT,
|
|
PV_GOT_INPUT } pv_phase_type;
|
|
|
|
struct position // each element in the structure array
|
|
{
|
|
long ana_pos; // the sample number of the center of analysis frame
|
|
long syn_pos; // the sample number of the center of synthesis frame
|
|
};
|
|
|
|
typedef struct {
|
|
void *(*malloc)(size_t); // malloc is used to allocate memory. If you
|
|
// have a real-time system and want to avoid the standard library
|
|
// malloc() which may have priority inversion problems due to locks,
|
|
// you can supply your own lock-free implementation
|
|
void (*free)(void *); // if you provide a custom malloc, you should
|
|
// provide a matching custom free()
|
|
int blocksize; // the size of audio blocks produced by the phase vocoder.
|
|
int fftsize; // the number of samples in each FFT. Should be power of 2.
|
|
int log2_fft; // 2 is the log base
|
|
int syn_hopsize; // the hopsize used in reconstructing the output
|
|
float ratio; // the time-stretch ratio. NOTE: even though ratio is
|
|
// specified as the amount input is stretched, THIS ratio is
|
|
// the reciprocal, i.e. input duration / output duration
|
|
int max_ana_hopsize; // fftsize / 3 is max hopsize
|
|
float pre_ratio; // previous ratio is used to calculate
|
|
// the previous input_length
|
|
int mode; // 0 - normal, 1 - phase fix, 2 - robovoice
|
|
float *ana_win; // the window function used on input (analysis)
|
|
float *syn_win; // the window function used on output (synthesis)
|
|
long input_eff_pos; // input effective position is the sample number
|
|
// of the input that corresponds to the current output
|
|
float *input_buffer; // used to buffer input samples
|
|
long input_buffer_len; // how many floats can input_buffer hold?
|
|
float *output_buffer; // used to buffer output samples
|
|
long output_buffer_len; // how big in floats is the output buffer?
|
|
float *input_head; // pointer to the start of the previous
|
|
// analysis frame. input_head is updated by
|
|
// hopsize before reading each frame.
|
|
float *input_rear; // pointer to the end of the input data
|
|
int frames_to_compute; // how many frames we'll compute in pv_get_output()
|
|
int expected_input; // the value computed by pv_get_input_count()
|
|
// we check that pv_put_input delivers what we asked for
|
|
float *out_next; // pointer into the output buffer from where the
|
|
// next output sample will delivered
|
|
float *frame_next; // pointer into the output buffer to where the
|
|
// next frame will by added
|
|
Pv_callback callback; // function to retrieve an input frame
|
|
void *rock; // object pointer or context info to be passed to callback
|
|
pv_phase_type phase;
|
|
BOOL first_time; // true only on the first fft output frame
|
|
BOOL absolute; // true if using the callback protocol -- set by create2()
|
|
float *ana_frame; // analysis frame
|
|
float *syn_frame; // synthesis frame
|
|
float *mag; // magnitude for points in the frame being processed
|
|
float *ana_phase; // phase for points in the analysis frame
|
|
// being processed
|
|
float *syn_phase; // phase for points in the synthesis frame
|
|
// being processed
|
|
float *pre_ana_phase; // recording last analysis phase for estimating
|
|
// the frequency
|
|
float *pre_syn_phase; // recording last systhesis phase for rebuilting
|
|
// the new phase
|
|
float *bin_freq; // bin frequency, used in phase unwrapping;
|
|
|
|
struct position *pos_buffer; // Circular array storing the sample
|
|
// number of the middle of the frames
|
|
// (both for analysis and synthesis frames)
|
|
struct position *pos_buffer_head; // beginning of the circular array,
|
|
// points to oldest entry. If equal to pos_buffer_rear, the queue
|
|
// is empty. Never points to
|
|
// pos_buffer + queue_length. It wraps around to pos_buffer.
|
|
struct position *pos_buffer_rear; // rear of the circular array,
|
|
// points to slot AFTER the most recent entry. If equal to
|
|
// pos_buffer_rear, the queue is empty. Never points to
|
|
// pos_buffer + queue_length. It wraps around to pos_buffer.
|
|
long queue_length; // length of the circular queue of corresponding times
|
|
long input_total; // how many input samples did we get so far?
|
|
// initially 0, and incremented upon pv_put_input()
|
|
// so the input_total count corresponds to the input_rear pointer.
|
|
long output_total; // how many output samples did we produce so far?
|
|
// initially 0, and incremented by block_size after each call to
|
|
// pv_get_output(). Corresponds to out_next.
|
|
} PV;
|
|
|
|
//extern long int sig;
|
|
//extern long int sig1;
|
|
//extern long int sig2;
|
|
|
|
// round_log_power - round fftsize up to a power of 2
|
|
// return log2(rounded up fftsize)
|
|
// optionally set size_ptr to rounded up fftsize
|
|
//
|
|
int round_log_power(int fftsize, int *size_ptr)
|
|
{
|
|
long double log2_fft = log2l(fftsize);
|
|
int round_log2_fft = (int) log2_fft;
|
|
if (round_log2_fft < log2_fft) {
|
|
round_log2_fft += 1;
|
|
}
|
|
if (log2_fft > 20 || ((1 << round_log2_fft) != fftsize)) {
|
|
round_log2_fft = 1024; // on error, substitute a sane value
|
|
}
|
|
if (size_ptr) *size_ptr = 1 << round_log2_fft;
|
|
return round_log2_fft;
|
|
}
|
|
|
|
|
|
Phase_vocoder pv_create(void *(*mallocfn)(size_t), void (*freefn)(void *))
|
|
{
|
|
if (!mallocfn) mallocfn = &malloc;
|
|
PV *pv = (PV *)mallocfn(sizeof(PV));
|
|
ZERO(pv, 1);
|
|
pv->phase = PV_UNINITIALIZED;
|
|
pv->malloc = mallocfn;
|
|
pv->free = freefn;
|
|
pv_set_fftsize(pv, 1 << LOGFFTSIZE_DEFAULT);
|
|
// syn_hopsize will now be FFTSIZE_DEFAULT / 8
|
|
// pv_set_syn_hopsize(pv, FFTSIZE_DEFAULT / 8);
|
|
pv->blocksize = pv->syn_hopsize;
|
|
pv_set_ratio(pv, RATIO_DEFAULT);
|
|
pv->first_time = TRUE;
|
|
pv->mode = 0;
|
|
return (Phase_vocoder)pv;
|
|
}
|
|
|
|
|
|
Phase_vocoder pv_create2(void *(*mallocfn)(size_t), void (*freefn)(void *),
|
|
Pv_callback callback, void *rock)
|
|
{
|
|
PV *pv = (PV *)pv_create(mallocfn, freefn);
|
|
pv->absolute = TRUE;
|
|
pv_set_callback(pv, callback, rock);
|
|
return (Phase_vocoder)pv;
|
|
}
|
|
|
|
|
|
void pv_end(Phase_vocoder *x)
|
|
{
|
|
PV *pv = (PV *)(*x);
|
|
fftFree();
|
|
PVFREE(ana_win);
|
|
PVFREE(syn_win);
|
|
PVFREE(input_buffer);
|
|
PVFREE(output_buffer);
|
|
PVFREE(ana_frame);
|
|
PVFREE(syn_frame);
|
|
PVFREE(mag);
|
|
PVFREE(ana_phase);
|
|
PVFREE(syn_phase);
|
|
PVFREE(pre_ana_phase);
|
|
PVFREE(pre_syn_phase);
|
|
PVFREE(bin_freq);
|
|
PVFREE(pos_buffer);
|
|
pv->free(pv);
|
|
*x = NULL;
|
|
}
|
|
|
|
void pv_set_callback(Phase_vocoder x, Pv_callback callback, void *rock)
|
|
{
|
|
PV *pv = (PV *)x;
|
|
pv->callback = callback;
|
|
pv->rock = rock;
|
|
}
|
|
|
|
void pv_set_blocksize(Phase_vocoder x, int n)
|
|
{
|
|
PV *pv = (PV *)x;
|
|
pv->blocksize = n;
|
|
pv->phase = PV_UNINITIALIZED;
|
|
}
|
|
|
|
void pv_set_fftsize(Phase_vocoder x, int n)
|
|
{
|
|
PV *pv = (PV *)x;
|
|
// n must be power of 2 and for sanity, we'll require it to be at least 16
|
|
// power of 2 test: only if n is a power of 2 will n-1 clear the high order
|
|
// bit:
|
|
if ((n & (n - 1)) || (n < 16)) {
|
|
return; // ignore bad argument
|
|
}
|
|
// preserve the same syn_hopsize ratio, e.g. if syn_hopsize
|
|
// is fftsize/8, then after setting fftsize, new syn_hopsize
|
|
// will be (new fftsize)/8
|
|
int hop = (pv->syn_hopsize == 0 ? 8 :
|
|
pv->fftsize / pv->syn_hopsize); // the divisor
|
|
pv->fftsize = n;
|
|
pv->log2_fft = round_log_power(n, &(pv->fftsize));
|
|
pv_set_syn_hopsize(x, n / hop);
|
|
pv->phase = PV_UNINITIALIZED;
|
|
pv->max_ana_hopsize = n / 3;
|
|
}
|
|
|
|
void pv_set_ratio(Phase_vocoder x, float ratio)
|
|
{
|
|
PV *pv = (PV *)x;
|
|
assert(pv->phase == PV_START || pv->phase == PV_UNINITIALIZED);
|
|
pv->pre_ratio = pv->ratio;
|
|
pv->ratio = 1.0F / ratio;
|
|
}
|
|
|
|
void pv_set_syn_hopsize(Phase_vocoder x, int n)
|
|
// set the hopsize. Must be fftsize divided by a power of 2.
|
|
// non-power-of-two n will be rounded up.
|
|
// hopsize must be at least 1 and at most fftsize/4.
|
|
// out-of-bound n will be put within bounds
|
|
{
|
|
PV *pv = (PV *)x;
|
|
if (n < 1) n = 1;
|
|
round_log_power(n, &(pv->syn_hopsize));
|
|
if (pv->syn_hopsize > pv->fftsize / 4) {
|
|
pv->syn_hopsize = pv->fftsize / 4;
|
|
}
|
|
pv->phase = PV_UNINITIALIZED;
|
|
}
|
|
|
|
void pv_initialize(Phase_vocoder x)
|
|
{
|
|
PV *pv = (PV *)x;
|
|
|
|
// allocate space and initialize for window
|
|
if (! pv->ana_win)
|
|
pv->ana_win = pv_window(pv, hann); //default analysis window is Hanning
|
|
if (! pv->syn_win)
|
|
pv->syn_win = pv_window(pv, hann); //default synthesis window is Hanning
|
|
|
|
// allocate space and initialize for input buffer and output buffer
|
|
if (pv->blocksize <= pv->syn_hopsize) {
|
|
pv->input_buffer_len = pv->fftsize;
|
|
} else {
|
|
// The maximum of ana_hopsize is fftsize/3, so the
|
|
// pv->input_buffer_len is set to the maximum so as to avoid
|
|
// freeing and allocating memory for input buffer many times
|
|
// due to the changing of time-stretching ratio.
|
|
// The maximum amount needed is fftsize to produce one frame of
|
|
// output plus: fftsize/3 for each additional frame. The total
|
|
// number of frames we must compute to form blocksize samples
|
|
// of output is blocksize / hopsize.
|
|
pv->input_buffer_len = pv->fftsize + 2 /* to avoid rounding error */ +
|
|
lroundf((((float) pv->blocksize / pv->syn_hopsize) - 1) * (pv->fftsize / 3.0F));
|
|
}
|
|
if (! pv->absolute) {
|
|
PVREALLOC(input_buffer, pv->input_buffer_len);
|
|
// preload with fftsize/2 zeros so that the first sample of the input
|
|
// will be at the center of the fft window
|
|
pv->input_head = pv->input_buffer;
|
|
ZERO(pv->input_buffer, pv->fftsize / 2);
|
|
pv->input_rear = pv->input_buffer + pv->fftsize / 2;
|
|
}
|
|
|
|
// how long does the output buffer need to be?
|
|
// It has to be long enough to add in an entire fft frame, so
|
|
// at least fftsize. It should then be at least syn_hopsize - 2
|
|
// bigger, so that if we're outputting syn_hopsize - 1 samples
|
|
// (a really bad choice, by the way), we could have syn_hopsize - 2
|
|
// samples in the output buffer and the next buffer gets added
|
|
// starting at location syn_hopsize - 2, so we need syn_hopsize - 2 +
|
|
// fftsize. Let's make it an even syn_hopsize + fftsize.
|
|
// It also has to be long enough to hold an output buffer length +
|
|
// fftsize.
|
|
// So overall, we need max(syn_hopsize, blocksize) + fftsize.
|
|
//
|
|
pv->output_buffer_len = pv->blocksize;
|
|
if (pv->blocksize <= pv->syn_hopsize) {
|
|
pv->output_buffer_len = pv->syn_hopsize;
|
|
}
|
|
pv->output_buffer_len += pv->fftsize;
|
|
PVREALLOC(output_buffer, pv->output_buffer_len);
|
|
D printf("pv_initialize: input_buffer_len %ld\n", pv->input_buffer_len);
|
|
D printf(" output_buffer_len %ld\n", pv->output_buffer_len);
|
|
D printf(" blocksize %d\n", pv->blocksize);
|
|
D printf(" fftsize %d\n", pv->fftsize);
|
|
D printf(" syn_hopsize %d\n", pv->syn_hopsize);
|
|
pv->out_next = pv->output_buffer;
|
|
pv->frame_next = pv->output_buffer;
|
|
ZERO(pv->output_buffer, pv->output_buffer_len);
|
|
PVREALLOC(ana_frame, pv->fftsize);
|
|
PVREALLOC(syn_frame, pv->fftsize);
|
|
PVREALLOC(mag, pv->fftsize / 2 + 1);
|
|
// allocate space for phase and pre_phase which will be used in
|
|
// phase unwrapping
|
|
PVREALLOC(ana_phase, pv->fftsize / 2 + 1);
|
|
PVREALLOC(syn_phase, pv->fftsize / 2 + 1);
|
|
PVREALLOC(pre_ana_phase, pv->fftsize / 2 + 1);
|
|
PVREALLOC(pre_syn_phase, pv->fftsize / 2 + 1);
|
|
// bin frequency, used in phase unwrapping
|
|
PVREALLOC(bin_freq, pv->fftsize / 2 + 1);
|
|
int i;
|
|
for (i = 0; i <= pv->fftsize / 2; i++)
|
|
pv->bin_freq[i] = (float) (TWOPI * i / pv->fftsize);
|
|
// get_effective_pos() maps from the beginning of the
|
|
// next block to the corresponding input sample. Since
|
|
// the output hopsize is fixed, the next output sample
|
|
// was at the center of the frame if we go back by
|
|
// framesize / syn_hopsize frames. Thus we need
|
|
// framesize / syn_hopsize entries in the queue. We'll
|
|
// pad by a couple to deal with rounding issues:
|
|
pv->queue_length = pv->fftsize / (pv->syn_hopsize * 2) + 2;
|
|
if (!pv->absolute) {
|
|
PVFREE(pos_buffer);
|
|
pv->pos_buffer = (struct position *)
|
|
(pv->malloc((pv->queue_length + 1) * sizeof(struct position)));
|
|
|
|
pv->pos_buffer_head = pv->pos_buffer;
|
|
pv->pos_buffer_rear = pv->pos_buffer;
|
|
}
|
|
// make sure tables are constructed before we start real-time processing
|
|
#ifndef NDEBUG
|
|
int fft_error_sign =
|
|
#endif
|
|
fftInit(pv->log2_fft); // target fftInit
|
|
assert(!fft_error_sign);
|
|
|
|
pv->phase = PV_START;
|
|
}
|
|
|
|
|
|
void pv_set_ana_window(Phase_vocoder x, float *window)
|
|
{
|
|
PV *pv = (PV*)x;
|
|
PVREALLOC(ana_win, pv->fftsize);
|
|
memcpy(pv->ana_win, window, pv->fftsize * sizeof(float));
|
|
}
|
|
|
|
|
|
void pv_set_syn_window(Phase_vocoder x, float *window)
|
|
{
|
|
PV *pv = (PV*)x;
|
|
PVREALLOC(syn_win, pv->fftsize);
|
|
memcpy(pv->syn_win, window, pv->fftsize * sizeof(float));
|
|
}
|
|
|
|
|
|
void pv_set_mode(Phase_vocoder x, int mode)
|
|
{
|
|
PV *pv = (PV*)x;
|
|
if (mode >= 0 && mode <= 2) {
|
|
pv->mode = mode;
|
|
}
|
|
}
|
|
|
|
|
|
float *pv_window(Phase_vocoder x, float (*window_type)(double x))
|
|
// window is after normalized
|
|
{
|
|
PV *pv = (PV *)x;
|
|
float sum_window_square = 0, COLA_factor;
|
|
int window_length = pv->fftsize;
|
|
float *window = (float *)pv->malloc(window_length * sizeof(float));
|
|
int i;
|
|
for (i = 0; i < window_length; i++) {
|
|
window[i] = window_type((double)i / window_length);
|
|
// note that the computation is all double even if window[i] is float
|
|
sum_window_square += window[i] * window[i];
|
|
}
|
|
COLA_factor = sum_window_square / pv->syn_hopsize;
|
|
for (i = 0; i <= pv->fftsize - 1; i++)
|
|
window[i] = (float) (window[i] / sqrt(COLA_factor));
|
|
return window;
|
|
}
|
|
|
|
|
|
int pv_get_input_count(Phase_vocoder x)
|
|
{
|
|
PV *pv = (PV*)x;
|
|
int ana_hopsize = lroundf((pv->syn_hopsize) * (pv->ratio));
|
|
if (ana_hopsize > pv->max_ana_hopsize) {
|
|
ana_hopsize = pv->max_ana_hopsize;
|
|
}
|
|
|
|
assert(pv->phase == PV_START);
|
|
|
|
// To produce blocksize, how many samples do we need? The next
|
|
// sample to output is at out_next, and the next frame will be
|
|
// added at frame_next, so we've already computed
|
|
// out_next - frame_next:
|
|
int need = pv->blocksize - (int)(pv->frame_next - pv->out_next);
|
|
// need is now the number of output samples we need
|
|
// How many fft frames will be required? Round up by adding hopsize-1:
|
|
int frames = (need + pv->syn_hopsize - 1) / pv->syn_hopsize;
|
|
if (frames > 0) {
|
|
// Skip hopsize frames except on the first time, where
|
|
// we always put first sample in the middle of the frame:
|
|
if (!pv->first_time) {
|
|
pv->input_head += ana_hopsize;
|
|
}
|
|
// We need framesize + hopsize * (frames - 1) in order to compute
|
|
// frames overlapping fft frames of size framesize.
|
|
need = pv->fftsize + ana_hopsize * (frames - 1);
|
|
// Now how many input samples do we have already?
|
|
long have = (long) (pv->input_rear - pv->input_head);
|
|
need -= have;
|
|
// See if we have room for need samples in the buffer:
|
|
if (pv->input_rear + need > pv->input_buffer + pv->input_buffer_len) {
|
|
// not enough room. Shift the input buffer to make space.
|
|
long shift = (long) (pv->input_head - pv->input_buffer);
|
|
memmove(pv->input_buffer, pv->input_head,
|
|
(pv->input_rear - pv->input_head) *
|
|
sizeof(*(pv->input_buffer)));
|
|
pv->input_head -= shift;
|
|
pv->input_rear -= shift;
|
|
D printf(" after input shift by %ld, head at %ld\n",
|
|
shift, (long) (pv->input_head - pv->input_buffer));
|
|
}
|
|
// make sure our assumptions are true and we now have space:
|
|
assert(pv->input_rear + need <=
|
|
pv->input_buffer + pv->input_buffer_len);
|
|
// See if we have room in the output_buffer
|
|
// last sample will be at frame_next + (frames - 1) * hopsize + fftsize
|
|
float *last_output = pv->frame_next +
|
|
(frames - 1) * pv->syn_hopsize + pv->fftsize;
|
|
if (last_output > pv->output_buffer + pv->output_buffer_len) {
|
|
// not enough room. Shift the output buffer to make space.
|
|
long shift = (long) (pv->out_next - pv->output_buffer);
|
|
memmove(pv->output_buffer, pv->out_next,
|
|
(pv->fftsize - pv->syn_hopsize) *
|
|
sizeof(*(pv->output_buffer)));
|
|
pv->frame_next -= shift;
|
|
pv->out_next -= shift;
|
|
}
|
|
} else {
|
|
frames = 0;
|
|
need = 0;
|
|
}
|
|
pv->frames_to_compute = frames;
|
|
pv->phase = PV_GOT_COUNT;
|
|
pv->expected_input = need;
|
|
return need;
|
|
}
|
|
|
|
#pragma warning(disable: 4715 4068) // return type and unknown pragma
|
|
#pragma clang diagnostic ignored "-Wreturn-type"
|
|
double pv_get_effective_pos(Phase_vocoder x)
|
|
{
|
|
PV *pv = (PV*)x;
|
|
assert(pv->phase == PV_START);
|
|
|
|
// Find the appropriate position struct for the computation of
|
|
// effective audio position. We are given pv->output_total, the
|
|
// sample count at which we want the equivalent input sample
|
|
// count. We want to interpolate between two queue entries, so
|
|
// we'll search the queue until we have an entry that is greater
|
|
// than output_total. Then we set head to the previous entry
|
|
// because it will make future searches go faster.
|
|
|
|
struct position *pos_find = pv->pos_buffer_head;
|
|
struct position *pos_find_prev = NULL;
|
|
while (pos_find != pv->pos_buffer_rear &&
|
|
pos_find->syn_pos <= pv->output_total) {
|
|
pos_find_prev = pos_find;
|
|
pos_find++;
|
|
if (pos_find == pv->pos_buffer + pv->queue_length) {
|
|
pos_find = pv->pos_buffer; // wrap
|
|
}
|
|
}
|
|
// if pos_find and pos_find_prev both point to something, we
|
|
// can interpolate:
|
|
if (pos_find != pv->pos_buffer_rear && pos_find_prev) {
|
|
// we can drop old positions from queue now:
|
|
pv->pos_buffer_head = pos_find_prev;
|
|
// interpolate
|
|
long output_step = pos_find->syn_pos - pos_find_prev->syn_pos;
|
|
long input_step = pos_find->ana_pos - pos_find_prev->ana_pos;
|
|
return pos_find_prev->ana_pos + input_step *
|
|
(double)(pv->output_total - pos_find_prev->syn_pos) /
|
|
(double)output_step;
|
|
// if there's nothing in the queue, then we must be starting.
|
|
// after the first frame there are TWO entries
|
|
} else if (pos_find_prev == NULL) {
|
|
// if any of these fail, maybe the queue_length is too small
|
|
// and we dropped some history too early
|
|
assert(pos_find == pv->pos_buffer_rear);
|
|
assert(pv->first_time);
|
|
assert(pv->output_total == 0);
|
|
return -(pv->ratio * pv->fftsize / 2.0);
|
|
} // I can't think of any other case.
|
|
assert(FALSE);
|
|
}
|
|
|
|
|
|
// Send samples to phase vocoder. size should match the number of
|
|
// samples computed by get_input_count.
|
|
//
|
|
void pv_put_input(Phase_vocoder x, int size, float *samples)
|
|
// 'samples' points to samples to be sent each time
|
|
{
|
|
PV *pv = (PV *)x;
|
|
assert(pv->phase == PV_GOT_COUNT);
|
|
// size must agree with the value computed by pv_get_input_count:
|
|
assert(pv->expected_input == size);
|
|
D printf("pv_put_input: size %d, %g at %ld\n",
|
|
size, *samples, (long) (pv->input_rear - pv->input_buffer));
|
|
if (size > 0) {
|
|
memcpy(pv->input_rear, samples, size * sizeof(*pv->input_rear));
|
|
pv->input_rear += size;
|
|
pv->input_total += size;
|
|
}
|
|
pv->phase = PV_GOT_INPUT;
|
|
}
|
|
|
|
|
|
void compute_one_frame(PV *pv, int ana_hopsize)
|
|
{
|
|
float *syn_frame = pv->syn_frame;
|
|
float *ana_frame = pv->ana_frame;
|
|
int fftsize = pv->fftsize;
|
|
int log2_fft = pv->log2_fft;
|
|
float *mag = pv->mag;
|
|
float *ana_phase = pv->ana_phase;
|
|
float *syn_phase = pv->syn_phase;
|
|
float *syn_win = pv->syn_win;
|
|
float *frame_next = pv->frame_next;
|
|
int syn_hopsize = pv->syn_hopsize;
|
|
float *pre_ana_phase = pv->pre_ana_phase;
|
|
float *pre_syn_phase = pv->pre_syn_phase;
|
|
float *bin_freq = pv->bin_freq;
|
|
int i;
|
|
//#define SKIP_PHASE_ADJUST
|
|
#ifdef SKIP_PHASE_ADJUST
|
|
// for debugging we just copy the windowed input to the output
|
|
// without adjusting phases.
|
|
memcpy(syn_frame, ana_frame, fftsize * sizeof(*syn_frame));
|
|
#else
|
|
/*DBG
|
|
long zeros = pv->output_total + frame_next - pv->out_next;
|
|
write_pv_frame(zeros, ana_frame, fftsize, "pvsyn");
|
|
DBG*/
|
|
OneDimensionFFTshift(ana_frame, fftsize); // FFTshift
|
|
fftInit(log2_fft);
|
|
rffts(ana_frame, log2_fft, 1);
|
|
|
|
/* get magnitude and phase */
|
|
mag[0] = ana_frame[0];
|
|
ana_phase[0] = 0;
|
|
mag[fftsize / 2] = ana_frame[1];
|
|
ana_phase[fftsize / 2] = 0;
|
|
for (i = 1; i < fftsize / 2; i++) {
|
|
float real = ana_frame[2 * i];
|
|
float imag = ana_frame[2 * i + 1];
|
|
mag[i] = (float)sqrt(real * real + imag * imag);
|
|
ana_phase[i] = (float)atan2(imag, real);
|
|
}
|
|
#ifdef PV_SINE_TEST
|
|
if (pvst_offset == -1000) pvst_offset = -24;
|
|
else pvst_offset += ana_hopsize;
|
|
// phase of the sine should be (offset / 64) * TWO_PI,
|
|
// but when sine phase is 0, atan2(0, -1) is -pi/2 so we have
|
|
// atan2_phase = sine_phase - pi/2
|
|
// adding 3 pi/2 instead of subtracting pi/2 to make fmod result positive
|
|
double est_atan2_phase = fmod((pvst_offset / 64.0) * TWOPI + (3 * M_PI_2), TWOPI);
|
|
if (est_atan2_phase > M_PI) est_atan2_phase -= TWOPI;
|
|
printf("offset %ld hop %ld ph[8] %5f est.ph %5f real %5f imag %5f\n",
|
|
pvst_offset, ana_hopsize, ana_phase[8], est_atan2_phase,
|
|
ana_frame[16], ana_frame[17]);
|
|
#endif
|
|
/* phase unwrapping & set synthesis phase */
|
|
if (pv->first_time) {
|
|
D printf("phasevocoder fftsize %d hopsize %d\n",
|
|
fftsize, syn_hopsize);
|
|
memcpy(syn_phase, ana_phase,
|
|
((fftsize / 2) + 1) * sizeof(*syn_phase));
|
|
} else if (pv->mode == PV_MODE_PHASEFIX) {
|
|
// we'll start each iteration with prev_min_x set to the lowest bin
|
|
// that will be assigned to the peak at prev_peak_x. We'll find the
|
|
// next_min_x and the following next_peak_x. Update the phases.
|
|
//
|
|
int prev_peak_x = 0; // index of previous peak
|
|
float prev_peak_mag; // magnitude of previous peak
|
|
int prev_min_x = 0; // index of previous local minimum
|
|
int next_peak_x; // index of peak between prev_min_x and next_min_x
|
|
float next_peak_mag; // magnitude of next peak
|
|
int next_min_x; // index of next minimum (after peak_x)
|
|
float next_min_mag; // magnitude at next_min_x
|
|
float last_mag = mag[0]; // used in search for peaks
|
|
float this_mag = mag[1];
|
|
float next_mag;
|
|
int i; // loop index
|
|
// decide if we're starting on a peak or a minimum:
|
|
if (last_mag <= this_mag) { // starting on a minimum
|
|
// find peak
|
|
for (i = 1; i < fftsize / 2; i++) {
|
|
next_mag = mag[i + 1]; // invariant: last_mag <= this_mag
|
|
if (this_mag > next_mag) { // found peak
|
|
prev_peak_x = i;
|
|
prev_peak_mag = this_mag;
|
|
break; // invariant: this_mag > next_mag
|
|
}
|
|
// invariant: this_mag <= next_mag
|
|
last_mag = this_mag;
|
|
this_mag = next_mag; // invariant: last_mag <= this_mag
|
|
}
|
|
if (i >= fftsize / 2) {
|
|
prev_peak_x = i;
|
|
}
|
|
} else { // set up to start loop
|
|
next_mag = this_mag; // invariant: last_mag > this_mag
|
|
this_mag = last_mag; // invariant: this_mag > next_mag
|
|
prev_peak_mag = last_mag;
|
|
}
|
|
while (prev_min_x <= fftsize / 2) {
|
|
// invariant: prev_min_x is previous local minimum or 0
|
|
// prev_peak_x is first local peak after prev_min_x (or 0)
|
|
// last_mag is mag at prev_peak_x - 1
|
|
// this_mag is mag at prev_peak_x
|
|
// next_mag is mag at prev_peak_x + 1
|
|
// find next minimum
|
|
// Note: prev_peak_x might be fftsize/2, so i might be fftsize/2 + 1
|
|
for (i = prev_peak_x + 1; i < fftsize / 2; i++) {
|
|
last_mag = this_mag; // invariant: this_mag > next_mag
|
|
this_mag = next_mag; // invariant: last_mag > this_mag
|
|
next_mag = mag[i + 1];
|
|
if (this_mag <= next_mag) { // found minimum
|
|
// here, last_mag at i-1, this_mag at i, next_mag at i+1
|
|
// and next_mix_x == i
|
|
break;
|
|
} // loop invariant: this_mag > next_mag
|
|
}
|
|
// invariant: this_mag <= next_mag || i == fftsize/2
|
|
if (i >= fftsize / 2) { // special case at end of spectrum
|
|
// no minimum found
|
|
next_min_x = fftsize / 2 + 1;
|
|
} else {
|
|
next_min_x = i; // either we found peak or we set i to fftsize/2
|
|
next_min_mag = mag[i];
|
|
} // invariant: this_mag <= next_mag || i == fftsize/2
|
|
// search for second peak;
|
|
for (i = next_min_x + 1; i < fftsize / 2; i++) {
|
|
last_mag = this_mag; // invariant: this_mag <= next_mag
|
|
this_mag = next_mag; // invariant: last_mag <= this_mag
|
|
next_mag = mag[i + 1];
|
|
if (this_mag > next_mag) { // found peak
|
|
// here, last_mag at i-1, this_mag at i, next_mag at i+1
|
|
// and next_peak_x == i
|
|
break;
|
|
} // loop invariant: this_mag <= next_mag
|
|
}
|
|
next_peak_x = i;
|
|
|
|
// special case if we're at the end:
|
|
if (i >= fftsize / 2) {
|
|
if (next_mag < this_mag) {
|
|
next_peak_x = fftsize / 2 + 1;
|
|
} else {
|
|
next_peak_x = fftsize / 2; // this may not be necessary
|
|
next_peak_mag = mag[next_peak_x];
|
|
}
|
|
} else {
|
|
next_peak_mag = mag[i];
|
|
}
|
|
// now we have prev_min, prev_peak, next_min, and next_peak. We
|
|
// want bins from prev_min to next_min to get the phase shift of
|
|
// prev_peak. First decide if next_min gets assigned to prev_peak
|
|
// or next_peak. Assign to the closest peak, but break ties by
|
|
// picking the largest peak.
|
|
if (next_min_x - prev_peak_x < next_peak_x - next_min_x) {
|
|
// closer to prev_peak, so include min with it
|
|
next_min_x++;
|
|
} else if ((next_min_x - prev_peak_x == next_peak_x - next_min_x) &&
|
|
// equidistant so see if prev_peak_mag > next_peak_mag
|
|
(prev_peak_mag > next_peak_mag)) {
|
|
next_min_x++;
|
|
}
|
|
// Now we want to adjust phases of prev_min_x, prev_min_x + 1,
|
|
// prev_min_x + 2, ..., next_min_x - 1.
|
|
//
|
|
// Assign the new phases according to the peak...
|
|
// increment between actual phase increment value
|
|
// and the phase increment value got when the
|
|
// it's the nearest bin frequency. Used
|
|
// in phase unwrapping:
|
|
int j = prev_peak_x; // just to make more concise notation
|
|
|
|
double phase_increment = ana_phase[j] - pre_ana_phase[j] -
|
|
bin_freq[j] * ana_hopsize;
|
|
// need to get phase_increment between -M_PI and +M_PI.
|
|
// Algorithm: add M_PI, get phase_increment between 0 and TWO_PI,
|
|
// then subtract M_PI:
|
|
phase_increment = fmod(phase_increment + M_PI, TWOPI);
|
|
if (phase_increment < 0)
|
|
phase_increment += TWOPI;
|
|
phase_increment -= M_PI;
|
|
// estimated frequency from phase unwrapping
|
|
/*DBG
|
|
if (j == 8)
|
|
printf("phase_increment %g hopsize %d\n", phase_increment, ana_hopsize);
|
|
DBG*/
|
|
float estimate_freq = (float) (phase_increment / ana_hopsize +
|
|
bin_freq[j]);
|
|
// get synthesis phase adjustment
|
|
phase_increment = pre_syn_phase[j] +
|
|
syn_hopsize * estimate_freq - ana_phase[j];
|
|
/*DBG
|
|
if (j == 8)
|
|
printf("phase_increment at %d is %g, freq %g ", j, phase_increment,
|
|
estimate_freq);
|
|
DBG*/
|
|
// update the range of bins:
|
|
for (i = prev_min_x; i < next_min_x; i++) {
|
|
syn_phase[i] = fmodf((float) (ana_phase[i] + phase_increment),
|
|
(float) TWOPI);
|
|
}
|
|
/*DBG
|
|
if (j == 8) printf("syn_phase[8] %g\n", syn_phase[8]);
|
|
DBG*/
|
|
// now get ready for the next iteration
|
|
prev_min_x = next_min_x;
|
|
prev_peak_x = next_peak_x;
|
|
prev_peak_mag = next_peak_mag;
|
|
|
|
}
|
|
} else if (pv->mode == PV_MODE_STANDARD) {
|
|
for (i = 0; i <= fftsize / 2; i++) {
|
|
// increment between actual phase increment value
|
|
// and the phase increment value got when the
|
|
// it's the nearest bin frequency. Used
|
|
// in phase unwrapping:
|
|
double phase_increment = ana_phase[i] - pre_ana_phase[i] -
|
|
bin_freq[i] * ana_hopsize;
|
|
// need to get phase_increment between -M_PI and +M_PI.
|
|
// Algorithm: add M_PI, get phase_increment between 0 and TWO_PI,
|
|
// then subtract M_PI:
|
|
phase_increment = fmod(phase_increment + M_PI, TWOPI);
|
|
if (phase_increment < 0)
|
|
phase_increment += TWOPI;
|
|
phase_increment -= M_PI;
|
|
|
|
// estimated frequency from phase unwrapping
|
|
float estimate_freq = (float) (phase_increment / ana_hopsize +
|
|
bin_freq[i]);
|
|
// set synthesis phase
|
|
syn_phase[i] = fmodf((float) (pre_syn_phase[i] +
|
|
syn_hopsize * estimate_freq),
|
|
(float) TWOPI);
|
|
}
|
|
} else if (pv->mode == PV_MODE_ROBOVOICE) {
|
|
; // syn_phase[] is unmodified, i.e. constant
|
|
} else {
|
|
assert(FALSE); // bad mode value
|
|
}
|
|
for (i = 0; i < fftsize / 2; i++) {
|
|
// record phases
|
|
pre_ana_phase[i] = ana_phase[i];
|
|
pre_syn_phase[i] = syn_phase[i];
|
|
|
|
// update realpart and imagpart
|
|
syn_frame[i * 2] = (float) (mag[i] * cos(syn_phase[i]));
|
|
syn_frame[i * 2 + 1] = (float) (mag[i] * sin(syn_phase[i]));
|
|
}
|
|
pre_ana_phase[i] = ana_phase[i];
|
|
pre_syn_phase[i] = syn_phase[i];
|
|
// update realpart and imagpart
|
|
syn_frame[1] = (float) (mag[i] * cos(syn_phase[i]));
|
|
// inverse FFT
|
|
riffts(syn_frame, log2_fft, 1);
|
|
|
|
// fftshift
|
|
OneDimensionFFTshift(syn_frame, fftsize);
|
|
#endif // SKIP_PHASE_ADJUST
|
|
D printf(" mid syn_frame->%g\n", syn_frame[pv->fftsize / 2]);
|
|
//D printf(" frame offset %ld\n", frame_next - pv->output_buffer);
|
|
// window the frame and then add it to the output buffer
|
|
// assume here that there is room to add in syn_frame
|
|
D printf(" add to frame_next: %ld\n",
|
|
(long) (pv->frame_next - pv->output_buffer));
|
|
/*DBG
|
|
float tmp_frame[4096];
|
|
for (int i = 0; i < fftsize; i++) {
|
|
tmp_frame[i] = syn_win[i] * syn_frame[i];
|
|
}
|
|
write_pv_frame(zeros, syn_win, fftsize, "pvsyn");
|
|
write_pv_frame(zeros, syn_frame, fftsize, "pvsyn");
|
|
write_pv_frame(zeros, tmp_frame, fftsize, "pvsyn");
|
|
DBG*/
|
|
int sum_count = fftsize - syn_hopsize;
|
|
for (i = 0; i < sum_count; i++) {
|
|
frame_next[i] += syn_win[i] * syn_frame[i];
|
|
}
|
|
for (/* continue from i */; i < fftsize; i++) {
|
|
DD assert(frame_next[i] == 0);
|
|
frame_next[i] = syn_win[i] * syn_frame[i];
|
|
}
|
|
frame_next += syn_hopsize;
|
|
pv->frame_next = frame_next;
|
|
}
|
|
|
|
|
|
void update_position_queue(PV *pv, float *ana_center)
|
|
{
|
|
int fftsize = pv->fftsize;
|
|
float *frame_next = pv->frame_next;
|
|
int syn_hopsize = pv->syn_hopsize;
|
|
float *out_next = pv->out_next;
|
|
|
|
// put the positions of the processed frame into the queue
|
|
if (pv->first_time) {
|
|
// insert a special starting correspondence:
|
|
pv->pos_buffer_rear->ana_pos = lroundf(-pv->ratio * fftsize / 2);
|
|
pv->pos_buffer_rear->syn_pos = 0;
|
|
pv->pos_buffer_rear++;
|
|
}
|
|
// center of analysis window was at ana_center
|
|
// input_total corresponds to input_rear
|
|
pv->pos_buffer_rear->ana_pos =
|
|
pv->input_total - (long) (pv->input_rear - ana_center);
|
|
// output window center was at frame_next - syn_hopsize + fftsize / 2
|
|
// output_total corresponds to out_next
|
|
pv->pos_buffer_rear->syn_pos = pv->output_total +
|
|
(long) ((frame_next - syn_hopsize + (fftsize / 2)) - out_next);
|
|
|
|
// set pos_buffer_rear to new last element in the queue
|
|
pv->pos_buffer_rear++;
|
|
if (pv->pos_buffer_rear == pv->pos_buffer + pv->queue_length) {
|
|
pv->pos_buffer_rear = pv->pos_buffer; // wrap
|
|
}
|
|
// if queue is too full, remove first element at pos_buffer_head
|
|
if (pv->pos_buffer_head == pv->pos_buffer_rear) {
|
|
pv->pos_buffer_head++;
|
|
if (pv->pos_buffer_head == pv->pos_buffer + pv->queue_length) {
|
|
pv->pos_buffer_head = pv->pos_buffer; // wrap
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
float *finish_output(PV *pv)
|
|
{
|
|
assert(pv->frame_next - pv->out_next >= pv->blocksize);
|
|
pv->phase = PV_START;
|
|
// remember the current output:
|
|
float *block = pv->out_next;
|
|
// update out_next where next output block will be computed
|
|
pv->out_next = block + pv->blocksize;
|
|
pv->output_total += pv->blocksize;
|
|
D printf(" return offset %ld = %g\n",
|
|
(long) (pv->out_next - pv->output_buffer), *(pv->out_next));
|
|
/* DEBUG: To produce a 32767-sample-long sawtooth from 0 to 1 (roughly)
|
|
* as output, uncomment the following loop. This might be the first step
|
|
* in debugging. If you do not get a smoothly increasing ramp for 32K
|
|
* samples, then you are not handling the output of cmupv properly.
|
|
*/
|
|
/* for (int i = 0; i < pv->blocksize; i++) {
|
|
* block[i] = ((pv->output_total - pv->blocksize + i) % 32767) / 32768.0;
|
|
* }
|
|
*/
|
|
return block;
|
|
}
|
|
|
|
|
|
float *pv_get_output(Phase_vocoder x)
|
|
{
|
|
PV *pv = (PV *)x;
|
|
assert(pv->phase == PV_GOT_INPUT);
|
|
#ifndef NDEBUG
|
|
long blocksize = pv->blocksize;
|
|
float *out_next = pv->out_next;
|
|
#endif
|
|
int fftsize = pv->fftsize;
|
|
int frames_to_compute = pv->frames_to_compute;
|
|
int syn_hopsize = pv->syn_hopsize;
|
|
float *ana_win = pv->ana_win;
|
|
float ratio = pv->ratio;
|
|
float *input_head = pv->input_head;
|
|
float *ana_frame = pv->ana_frame;
|
|
float *ana_center;
|
|
|
|
int ana_hopsize = lroundf(syn_hopsize * ratio);
|
|
if (ana_hopsize > pv->max_ana_hopsize) {
|
|
ana_hopsize = pv->max_ana_hopsize;
|
|
}
|
|
|
|
// compute frames and add them to the output_buffer until there
|
|
// are blocksize samples ready to deliver
|
|
D printf("pv_get_output: frames_to_compute %d\n", frames_to_compute);
|
|
int frame;
|
|
for (frame = 0; frame < frames_to_compute; frame++) {
|
|
assert(pv->frame_next - out_next < blocksize);
|
|
int i;
|
|
for (i = 0; i < fftsize; i++) // get and window the buffer
|
|
ana_frame[i] = input_head[i] * ana_win[i];
|
|
ana_center = input_head + fftsize / 2;
|
|
D printf(" mid ana_frame->%g at %ld\n", *ana_center,
|
|
(long) (ana_center - pv->input_buffer));
|
|
if (frame < frames_to_compute - 1) {
|
|
input_head += ana_hopsize; // get ready for next iteration,
|
|
// but on the last iteration, we do not add hopsize because
|
|
// ratio might change. ana_hopsize is added in get_input_count()
|
|
// to set up the next analysis frame location
|
|
} else {
|
|
// on the last iteration, update pv->input_head.
|
|
// Equivalently, after the for loop we could do
|
|
// if (frames_to_compute > 0)
|
|
// pv->input_head += ana_hopsize * (frames_to_compute - 1);
|
|
pv->input_head = input_head;
|
|
}
|
|
compute_one_frame(pv, ana_hopsize);
|
|
update_position_queue(pv, ana_center);
|
|
// first_time is not reset in update_position_queue where it is tested
|
|
// because it is also used in pv_get_output2, which does not call
|
|
// update_position_queue()
|
|
pv->first_time = FALSE;
|
|
}
|
|
return finish_output(pv);
|
|
}
|
|
|
|
|
|
float *pv_get_output2(Phase_vocoder x)
|
|
{
|
|
PV *pv = (PV *)x;
|
|
assert(pv->phase == PV_START);
|
|
|
|
long blocksize = pv->blocksize;
|
|
int fftsize = pv->fftsize;
|
|
float *out_next = pv->out_next;
|
|
float *output_buffer = pv->output_buffer;
|
|
float *ana_frame = pv->ana_frame;
|
|
float *ana_win = pv->ana_win;
|
|
long output_buffer_len = pv->output_buffer_len;
|
|
|
|
D printf("pv_get_output2: blocksize %ld frame_next %ld "
|
|
"out_next %ld buffer offset %ld\n",
|
|
blocksize, (long) (pv->frame_next - output_buffer),
|
|
(long) (out_next - output_buffer),
|
|
(long) (pv->output_total - (out_next - output_buffer)));
|
|
|
|
// To produce blocksize, how many samples do we need? The next
|
|
// sample to output is at out_next, and the next frame will be
|
|
// addded at frame_next, so we've already computed
|
|
// out_next - frame_next:
|
|
while (blocksize > (pv->frame_next - out_next)) {
|
|
long out_cnt = (long) (pv->output_total + (pv->frame_next - out_next) +
|
|
fftsize / 2);
|
|
// if there's no room in the output buffer, shift the samples.
|
|
// This is done here to avoid extra work (sometimes pv_get_output2
|
|
// can be called and the samples are already in the buffer so there's
|
|
// no need to shift.
|
|
if (pv->frame_next + fftsize > output_buffer + output_buffer_len) {
|
|
long shift = (long) (out_next - output_buffer);
|
|
D printf("shift output by %ld\n", shift);
|
|
memmove(output_buffer, out_next,
|
|
(output_buffer_len - shift) * sizeof(*output_buffer));
|
|
/* for debugging, fill the end with zero. When we write (rather
|
|
than add) to the buffer, assert that we're over-writing zeros */
|
|
DD ZERO(output_buffer + output_buffer_len - shift, shift);
|
|
out_next = output_buffer;
|
|
pv->out_next = output_buffer;
|
|
pv->frame_next -= shift;
|
|
}
|
|
int ana_hopsize = (*pv->callback)(out_cnt, ana_frame,
|
|
fftsize, pv->rock);
|
|
/* DEBUG - To check input, the following commented code will
|
|
write each analysis frame as a file. The analysis frame is
|
|
prefixed with zeros so that it will be placed at the right
|
|
time, but this generates N^2 samples, so only the first 20
|
|
frames are written */
|
|
/*DBG
|
|
write_pv_frame(out_cnt - fftsize / 2, ana_frame, fftsize, "pvana");
|
|
DBG*/
|
|
int i;
|
|
for (i = 0; i < fftsize; i++) ana_frame[i] *= ana_win[i];
|
|
compute_one_frame(pv, ana_hopsize);
|
|
pv->first_time = FALSE;
|
|
D printf("pv_get_output2: blocksize %ld frame_next %ld "
|
|
"out_next %ld buffer offset %ld\n",
|
|
blocksize, (long) (pv->frame_next - output_buffer),
|
|
(long) (out_next - output_buffer),
|
|
(long) (pv->output_total - (out_next - output_buffer)));
|
|
}
|
|
D printf("pv_get_output2 returning at offset %ld abs %ld\n",
|
|
(long) (pv->out_next - pv->output_buffer), pv->output_total);
|
|
/*DBG out_next position is output_total, so if we subtract out_next - output_buffer,
|
|
we get the absolute position of the output_buffer
|
|
write_pv_frame(pv->output_total - (pv->out_next - pv->output_buffer),
|
|
pv->output_buffer, pv->output_buffer_len, "pvbuf");
|
|
DBG*/
|
|
return finish_output(pv);
|
|
}
|