497 lines
18 KiB
C
497 lines
18 KiB
C
/* convolve.c -- implements (non-"fast") convolution */
|
|
/*
|
|
* Note: this code is mostly generated by translate.lsp (see convole.tran
|
|
* in the tran directory), but it has been modified by hand to extend the
|
|
* stop time to include the "tail" of the convolution beyond the length
|
|
* of the first parameter.
|
|
*/
|
|
|
|
/* Original convolve.c modified to do fast convolution. Here are some
|
|
* notes:
|
|
* The first arg is arbitrary length. The second arg is the impulse
|
|
* response, which is converted into a table. The FFT size will be
|
|
* limited to 64K, which allows convolution with up to 32K samples.
|
|
* For longer impulse responses, we'll have to do convolutions one
|
|
* 32K block at a time. I considered just limiting the convolution
|
|
* size and handling longer impulse responses in Nyquist XLISP code,
|
|
* but that would require taking FFT's of each input block multiple
|
|
* times. Here, we save the FFT's and reuse them, which should gain
|
|
* a factor of 2 in speed (we still have to inverse FFT each block
|
|
* after multiplication, which should take 1/2 the time of doing
|
|
* FFT/inverse-FFT on each block).
|
|
*
|
|
* The fast convolution works like this:
|
|
* inputs are x_snd and h_snd.
|
|
* Compute the length of h_snd in samples.
|
|
* Set fft_size = MAX_FFT_SIZE
|
|
* If length <= MAX_FFT_SIZE / 4 then
|
|
* set fft_size = (round length to power of 2) * 2
|
|
* set N = fft_size/2
|
|
* Set h_len = (length rounded up to multiple of fft_size/2) * 2
|
|
* Let L = h_len/ fft_size
|
|
* Allocate H of h_len floats
|
|
* Iterate over i from 0 to L-1:
|
|
* Copy ht with zero fill into H[i] of size fft_size,
|
|
* where each H[i] of size fft_size is filled with
|
|
* fft_size/2 samples (except for the last H[i])
|
|
* Compute FFT of H[i] in place (FFT size is fft_size)
|
|
* Allocate X of h_len floats. This represents the history
|
|
* of x_snd, which is initially all zero, so the FFT, X is all zero
|
|
* Allocate output buffers Y and R, each of size fft_size
|
|
* Iterate over j (i.e. run this to generate MAX_CONVOLVE_LEN
|
|
* samples; then j = (j + 1) mod L.
|
|
* Copy 2nd half of R to first half and zero the 2nd half.
|
|
* Note: the first time does nothing because R is initially
|
|
* filled with zeros
|
|
* Copy fft_size/2 samples of x_snd into X[j],
|
|
* where X[j] is of size fft_size and filled with
|
|
* N samples (except when x_snd terminates)
|
|
* Zero fill X[j]
|
|
* Compute FFT of X[j] in place.
|
|
* Iterate k = 0 to L-1
|
|
* Multiply X[(j-k) mod L] by H[k] (result goes into Y).
|
|
* Compute IFFT of Y in place. Y is now time domain convolution
|
|
* of two blocks of samples.
|
|
* Add Y to R.
|
|
* Now N samples of R can be output.
|
|
* For simplicity, we'll keep processing x_snd input even after x_snd
|
|
* terminates. This will avoid special cases where we do not need all
|
|
* of X[j] at the end of the convolution.
|
|
*
|
|
* Length of output is length of x input + length of h
|
|
*/
|
|
|
|
// You can turn on debugging output with: #define D if (1)
|
|
#define D if (0)
|
|
|
|
#define MAX_IR_LEN 4000000 /* maximum impulse response length */
|
|
#define MAX_LOG_FFT_SIZE 16 /* maximum fft size for convolution */
|
|
//#define MAX_LOG_FFT_SIZE 4 /* maximum fft size for convolution */
|
|
#define _USE_MATH_DEFINES 1 /* for Visual C++ to get M_LN2 */
|
|
#include <assert.h>
|
|
#include <math.h>
|
|
#include "stdio.h"
|
|
#ifndef mips
|
|
#include "stdlib.h"
|
|
#endif
|
|
#include "xlisp.h"
|
|
#include "sound.h"
|
|
#include "samples.h"
|
|
|
|
#include "falloc.h"
|
|
#include "cext.h"
|
|
#include "fftlib.h"
|
|
#include "fftext.h"
|
|
#include "convolve.h"
|
|
|
|
void convolve_free(snd_susp_type a_susp);
|
|
|
|
|
|
typedef struct convolve_susp_struct {
|
|
snd_susp_node susp;
|
|
int64_t terminate_cnt;
|
|
boolean know_end_of_x;
|
|
boolean logically_stopped;
|
|
sound_type x_snd;
|
|
int x_snd_cnt;
|
|
sample_block_values_type x_snd_ptr;
|
|
|
|
sample_type *X; // the FFTs of x_snd
|
|
int j; // which block are we processing? 0 <= j < L
|
|
sample_type *H; // the FFTs of h_snd
|
|
sample_type *Y; // product of X*H where we inverse FFT
|
|
int h_snd_len; // true length of h_snd in samples
|
|
int N; // length of convolution, FFTs are of size 2*N
|
|
int M; // log2 of 2*N, the FFT size
|
|
int L; // number of blocks: h_len / (2*N)
|
|
sample_type *R; // result buffer where output is summed
|
|
sample_type *R_current; // pointer to next sample to output
|
|
} convolve_susp_node, *convolve_susp_type;
|
|
|
|
/*
|
|
void h_reverse(sample_type *h, long len)
|
|
{
|
|
sample_type temp;
|
|
int i;
|
|
|
|
for (i = 0; i < len; i++) {
|
|
temp = h[i];
|
|
h[i] = h[len - 1];
|
|
h[len - 1] = temp;
|
|
len--;
|
|
}
|
|
}
|
|
*/
|
|
|
|
void convolve_s_fetch(snd_susp_type a_susp, snd_list_type snd_list)
|
|
{
|
|
convolve_susp_type susp = (convolve_susp_type) a_susp;
|
|
int cnt = 0; /* how many samples computed */
|
|
int togo;
|
|
int n;
|
|
sample_block_type out;
|
|
register sample_block_values_type out_ptr;
|
|
|
|
register sample_block_values_type out_ptr_reg;
|
|
|
|
sample_type *R = susp->R;
|
|
sample_type *R_current;
|
|
int N = susp->N;
|
|
falloc_sample_block(out, "convolve_s_fetch");
|
|
out_ptr = out->samples;
|
|
snd_list->block = out;
|
|
|
|
while (cnt < max_sample_block_len) { /* outer loop */
|
|
/* first compute how many samples to generate in inner loop: */
|
|
/* don't overflow the output sample block: */
|
|
togo = max_sample_block_len - cnt;
|
|
/* if we need output samples, generate them here */
|
|
D printf("test R_current at offset %td\n", susp->R_current - R);
|
|
if (susp->R_current >= R + N) { // true when we output half of R
|
|
int i = 0;
|
|
int k;
|
|
sample_type *Xj = susp->X + susp->j * N * 2;
|
|
sample_type *H = susp->H;
|
|
sample_type *Y = susp->Y;
|
|
int to_copy;
|
|
/* Shift R, zero fill: */
|
|
memcpy(R, R + N, N * sizeof(*R));
|
|
memset(R + N, 0, N * sizeof(*R));
|
|
/* Copy N samples of x_snd into Xj and zero fill to size 2N */
|
|
D printf("Copying N samples of x_snd into Xj at offset %td\n", Xj - susp->X);
|
|
while (i < N) {
|
|
if (susp->x_snd_cnt == 0) {
|
|
susp_get_samples(x_snd, x_snd_ptr, x_snd_cnt);
|
|
if (susp->x_snd->logical_stop_cnt ==
|
|
susp->x_snd->current - susp->x_snd_cnt) {
|
|
min_cnt(&susp->susp.log_stop_cnt, susp->x_snd,
|
|
(snd_susp_type) susp, susp->x_snd_cnt);
|
|
}
|
|
}
|
|
/* This code is not standard. Since we extend the terminate
|
|
* count by susp->h_snd_len, the "standard" call to min_cnt()
|
|
* results in extending the terminate time forever. Instead,
|
|
* we make this code run once only by setting know_end_of_x.
|
|
*/
|
|
if (!susp->know_end_of_x &&
|
|
susp->x_snd_ptr == zero_block->samples) {
|
|
susp->terminate_cnt = susp->x_snd->current - susp->x_snd_cnt;
|
|
/* extend the output to include impulse response */
|
|
susp->terminate_cnt += susp->h_snd_len;
|
|
susp->know_end_of_x = TRUE;
|
|
}
|
|
/* copy no more than the remaining space and no more than
|
|
* the amount remaining in the block
|
|
*/
|
|
to_copy = min(N - i, susp->x_snd_cnt);
|
|
memcpy(Xj + i, susp->x_snd_ptr,
|
|
to_copy * sizeof(*susp->x_snd_ptr));
|
|
susp->x_snd_ptr += to_copy;
|
|
susp->x_snd_cnt -= to_copy;
|
|
i += to_copy;
|
|
}
|
|
/* zero fill to size 2N */
|
|
memset(Xj + N, 0, N * sizeof(Xj[0]));
|
|
D {
|
|
printf("Xj at offset %td: ", Xj - susp->X);
|
|
printf(" %d samples ", susp->N * 2);
|
|
float big = 0.0;
|
|
for (i = 0; i < susp->N * 2; i++) {
|
|
// printf("%g ", Xj[i]);
|
|
big = max(big, fabs(Xj[i]));
|
|
}
|
|
printf("MAX: %g\n", big);
|
|
}
|
|
/* Compute FFT of Xj in place */
|
|
fftInit(susp->M);
|
|
rffts(Xj, susp->M, 1);
|
|
/* convolve pairs of blocks and sum into Y */
|
|
memset(Y, 0, N * sizeof(*Y)); /* initialize sum to zero */
|
|
for (k = 0; k < susp->L; k++) {
|
|
/* Multiply Xj by H (result goes into X) */
|
|
sample_type *X = susp->X + ((susp->L + susp->j - k) % susp->L) * N * 2;
|
|
rspectprod(X, H + k * N * 2, Y, N * 2);
|
|
/* Compute IFFT of Y in place */
|
|
riffts(Y, susp->M, 1);
|
|
/* R += Y */
|
|
D { printf("Output block %d, X offset %td: ", k, X - susp->X);
|
|
printf(" %d samples ", 2 * N);
|
|
float big = 0.0;
|
|
for (i = 0; i < 2 * N; i++) {
|
|
big = max(big, fabs(Y[i]));
|
|
}
|
|
printf("MAX: %g\n", big);
|
|
}
|
|
for (i = 0; i < 2 * N; i++) {
|
|
R[i] += Y[i];
|
|
}
|
|
}
|
|
/* now N samples of R can be output */
|
|
susp->R_current = R;
|
|
D printf("R: %d samples ", susp->N);
|
|
D { float big = 0.0;
|
|
for (i = 0; i < susp->N; i++) {
|
|
// printf("%g ", R[i]);
|
|
big = max(big, fabs(R[i]));
|
|
}
|
|
printf("MAX: %g\n", big);
|
|
}
|
|
susp->j = (susp->j + 1) % susp->L;
|
|
}
|
|
/* compute togo, the number of samples to "compute" */
|
|
/* can't use more than what's left in R. R_current is
|
|
the next sample of R, so what's left is N - (R - R_current) */
|
|
R_current = susp->R_current;
|
|
togo = (int) min(togo, N - (R_current - R));
|
|
|
|
/* don't run past terminate time */
|
|
if (susp->terminate_cnt != UNKNOWN &&
|
|
susp->terminate_cnt <= susp->susp.current + cnt + togo) {
|
|
togo = (int) (susp->terminate_cnt - (susp->susp.current + cnt));
|
|
if (togo == 0) break;
|
|
}
|
|
|
|
/* don't run past logical stop time */
|
|
if (!susp->logically_stopped &&
|
|
susp->susp.log_stop_cnt != UNKNOWN &&
|
|
susp->susp.log_stop_cnt <= susp->susp.current + cnt + togo) {
|
|
togo = (int) (susp->susp.log_stop_cnt - (susp->susp.current + cnt));
|
|
D printf("susp->susp.log_stop_cnt is set to %" PRId64 "\n",
|
|
susp->susp.log_stop_cnt);
|
|
if (togo == 0) break;
|
|
}
|
|
|
|
n = togo;
|
|
out_ptr_reg = out_ptr;
|
|
if (n) do { /* the inner sample computation loop */
|
|
*out_ptr_reg++ = (sample_type) *R_current++;
|
|
} while (--n); /* inner loop */
|
|
|
|
/* using R_current is a bad idea on RS/6000: */
|
|
susp->R_current += togo;
|
|
out_ptr += togo;
|
|
cnt += togo;
|
|
} /* outer loop */
|
|
|
|
/* test for termination */
|
|
if (togo == 0 && cnt == 0) {
|
|
snd_list_terminate(snd_list);
|
|
} else {
|
|
snd_list->block_len = cnt;
|
|
susp->susp.current += cnt;
|
|
}
|
|
/* test for logical stop */
|
|
if (susp->logically_stopped) {
|
|
snd_list->logically_stopped = true;
|
|
} else if (susp->susp.log_stop_cnt == susp->susp.current) {
|
|
susp->logically_stopped = true;
|
|
}
|
|
} /* convolve_s_fetch */
|
|
|
|
|
|
void convolve_toss_fetch(snd_susp_type a_susp, snd_list_type snd_list)
|
|
{
|
|
convolve_susp_type susp = (convolve_susp_type) a_susp;
|
|
time_type final_time = susp->susp.t0;
|
|
long n;
|
|
|
|
/* fetch samples from x_snd up to final_time for this block of zeros */
|
|
while ((ROUNDBIG((final_time - susp->x_snd->t0) * susp->x_snd->sr)) >=
|
|
susp->x_snd->current)
|
|
susp_get_samples(x_snd, x_snd_ptr, x_snd_cnt);
|
|
/* convert to normal processing when we hit final_count */
|
|
/* we want each signal positioned at final_time */
|
|
n = (long) ROUNDBIG((final_time - susp->x_snd->t0) * susp->x_snd->sr -
|
|
(susp->x_snd->current - susp->x_snd_cnt));
|
|
susp->x_snd_ptr += n;
|
|
susp_took(x_snd_cnt, n);
|
|
susp->susp.fetch = susp->susp.keep_fetch;
|
|
(*(susp->susp.fetch))(a_susp, snd_list);
|
|
}
|
|
|
|
|
|
void convolve_mark(snd_susp_type a_susp)
|
|
{
|
|
convolve_susp_type susp = (convolve_susp_type) a_susp;
|
|
sound_xlmark(susp->x_snd);
|
|
}
|
|
|
|
|
|
void convolve_free(snd_susp_type a_susp)
|
|
{
|
|
convolve_susp_type susp = (convolve_susp_type) a_susp;
|
|
free(susp->R);
|
|
free(susp->X);
|
|
free(susp->Y);
|
|
free(susp->H);
|
|
sound_unref(susp->x_snd);
|
|
ffree_generic(susp, sizeof(convolve_susp_node), "convolve_free");
|
|
}
|
|
|
|
|
|
void convolve_print_tree(snd_susp_type a_susp, int n)
|
|
{
|
|
convolve_susp_type susp = (convolve_susp_type) a_susp;
|
|
indent(n);
|
|
stdputstr("x_snd:");
|
|
sound_print_tree_1(susp->x_snd, n);
|
|
}
|
|
|
|
void fill_with_samples(sample_type *x, sound_type s, long n)
|
|
{
|
|
/* this is based on snd_fetch in samples.c */
|
|
#define CNT extra[1]
|
|
#define INDEX extra[2]
|
|
#define FIELDS 3
|
|
#define SAMPLES list->block->samples
|
|
int i;
|
|
for (i = 0; i < n; i++) {
|
|
if (!s->extra) { /* this is the first call, so fix up s */
|
|
s->extra = (int64_t *) malloc(sizeof(s->extra[0]) * FIELDS);
|
|
s->extra[0] = sizeof(s->extra[0]) * FIELDS;
|
|
s->CNT = s->INDEX = 0;
|
|
}
|
|
int icnt = (int) s->CNT; /* need this to be int type */
|
|
assert(icnt >= 0);
|
|
if (icnt == s->INDEX) {
|
|
sound_get_next(s, &icnt);
|
|
assert(icnt >= 0);
|
|
s->CNT = icnt; /* save the count back into s->extra */
|
|
s->INDEX = 0;
|
|
}
|
|
x[i] = s->SAMPLES[s->INDEX++] * s->scale;
|
|
assert(x[i] < 2);
|
|
}
|
|
D { float big = 0.0;
|
|
for (i = 0; i < n; i++) {
|
|
big = max(big, fabs(x[i]));
|
|
assert(big < 2);
|
|
}
|
|
printf("fill_with_samples n %ld scale %g max %g\n", n, s->scale, big);
|
|
}
|
|
}
|
|
|
|
|
|
sound_type snd_make_convolve(sound_type x_snd, sound_type h_snd)
|
|
{
|
|
register convolve_susp_type susp;
|
|
rate_type sr = x_snd->sr;
|
|
time_type t0 = x_snd->t0;
|
|
sample_type scale_factor = 1.0F;
|
|
time_type t0_min = t0;
|
|
int64_t h_len;
|
|
int i;
|
|
// assume fft_size is maximal. We fix this later if it is wrong
|
|
long fft_size = 1 << MAX_LOG_FFT_SIZE;
|
|
if (sr != h_snd->sr) {
|
|
xlfail("convolve requires both inputs to have the same sample rates");
|
|
}
|
|
falloc_generic(susp, convolve_susp_node, "snd_make_convolve");
|
|
/* compute the length of h_snd in samples */
|
|
h_len = snd_length(h_snd, MAX_IR_LEN + 1);
|
|
if (h_len > MAX_IR_LEN) {
|
|
char emsg[100];
|
|
sprintf(emsg, "convolve maximum impulse length is %d", MAX_IR_LEN);
|
|
xlfail(emsg);
|
|
}
|
|
/* len is the impulse response length;
|
|
* the FFT size is at least double that */
|
|
if (h_len <= fft_size / 4) {
|
|
/* compute log-base-2(h_len): */;
|
|
double log_len = log((double) h_len) / M_LN2;
|
|
int log_len_int = (int) log_len;
|
|
if (log_len_int != log_len) log_len_int++; /* round up to power of 2 */
|
|
susp->M = log_len_int + 1;
|
|
} else {
|
|
susp->M = MAX_LOG_FFT_SIZE;
|
|
}
|
|
fft_size = (1 << susp->M);
|
|
D printf("fft_size %ld\n", fft_size);
|
|
susp->N = fft_size / 2;
|
|
// round h_len up to multiple of susp->N and multiply by 2
|
|
susp->h_snd_len = (int) h_len;
|
|
h_len = ((h_len + susp->N - 1) / susp->N) * susp->N * 2;
|
|
susp->L = (int) (h_len / fft_size);
|
|
// allocate memory
|
|
susp->H = (sample_type *) calloc((size_t) h_len, sizeof(susp->H[0]));
|
|
if (!susp->H) {
|
|
xlfail("memory allocation failure in convolve");
|
|
}
|
|
for (i = 0; i < susp->L; i++) {
|
|
/* copy fft_size/2 samples into each H[i] */
|
|
fill_with_samples(susp->H + i * susp->N * 2, h_snd, susp->N);
|
|
}
|
|
for (i = 0; i < susp->L; i++) {
|
|
int j;
|
|
float *H = susp->H + i * susp->N * 2;
|
|
D { printf("H_%d at %td: ", i, H - susp->H);
|
|
printf("%d samples ", susp->N * 2);
|
|
float big = 0.0;
|
|
for (j = 0; j < susp->N * 2; j++) {
|
|
big = max(big, fabs(H[j]));
|
|
assert(big < 2);
|
|
// printf("%g ", H[j]);
|
|
}
|
|
printf("big %g\n", big);
|
|
}
|
|
}
|
|
sound_unref(h_snd);
|
|
h_snd = NULL;
|
|
/* remaining N samples are already zero-filled */
|
|
if (fftInit(susp->M)) {
|
|
free(susp->H);
|
|
xlfail("fft initialization error in convolve");
|
|
}
|
|
/* take the FFT of each block of the impulse response */
|
|
for (i = 0; i < susp->L; i++) {
|
|
rffts(susp->H + i * susp->N * 2, susp->M, 1);
|
|
}
|
|
susp->X = (sample_type *) calloc((size_t) h_len, sizeof(susp->X[0]));
|
|
susp->R = (sample_type *) calloc(fft_size, sizeof(susp->R[0]));
|
|
susp->Y = (sample_type *) calloc(fft_size, sizeof(susp->Y[0]));
|
|
if (!susp->X || !susp->R || !susp->Y) {
|
|
free(susp->H);
|
|
if (susp->X) free(susp->X);
|
|
if (susp->R) free(susp->R);
|
|
if (susp->Y) free(susp->Y);
|
|
xlfail("memory allocation failed in convolve");
|
|
}
|
|
susp->R_current = susp->R + susp->N;
|
|
susp->susp.fetch = &convolve_s_fetch;
|
|
susp->terminate_cnt = UNKNOWN;
|
|
susp->know_end_of_x = FALSE;
|
|
/* handle unequal start times, if any */
|
|
if (t0 < x_snd->t0) sound_prepend_zeros(x_snd, t0);
|
|
/* minimum start time over all inputs: */
|
|
t0_min = min(x_snd->t0, t0);
|
|
/* how many samples to toss before t0: */
|
|
susp->susp.toss_cnt = (long) ((t0 - t0_min) * sr + 0.5);
|
|
if (susp->susp.toss_cnt > 0) {
|
|
susp->susp.keep_fetch = susp->susp.fetch;
|
|
susp->susp.fetch = convolve_toss_fetch;
|
|
}
|
|
|
|
/* initialize susp state */
|
|
susp->susp.free = convolve_free;
|
|
susp->susp.sr = sr;
|
|
susp->susp.t0 = t0;
|
|
susp->susp.mark = convolve_mark;
|
|
susp->susp.print_tree = convolve_print_tree;
|
|
susp->susp.name = "convolve";
|
|
susp->logically_stopped = false;
|
|
susp->susp.log_stop_cnt = logical_stop_cnt_cvt(x_snd);
|
|
susp->susp.current = 0;
|
|
susp->x_snd = x_snd;
|
|
susp->x_snd_cnt = 0;
|
|
susp->j = 0;
|
|
return sound_create((snd_susp_type)susp, t0, sr, scale_factor);
|
|
}
|
|
|
|
|
|
sound_type snd_convolve(sound_type x_snd, sound_type h_snd)
|
|
{
|
|
sound_type x_snd_copy = sound_copy(x_snd);
|
|
sound_type h_snd_copy = sound_copy(h_snd);
|
|
return snd_make_convolve(x_snd_copy, h_snd_copy);
|
|
}
|