Skip to content

Commit

Permalink
Adding RTCD for DNN code
Browse files Browse the repository at this point in the history
Starting with compute_linear()
  • Loading branch information
jmvalin committed Nov 14, 2023
1 parent 58923f6 commit 12df2de
Show file tree
Hide file tree
Showing 31 changed files with 525 additions and 158 deletions.
19 changes: 17 additions & 2 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -50,18 +50,30 @@ if CPU_X86
if HAVE_RTCD
CELT_SOURCES += $(CELT_SOURCES_X86_RTCD)
SILK_SOURCES += $(SILK_SOURCES_X86_RTCD)
if HAVE_SSE
LPCNET_SOURCES += $(DNN_SOURCES_X86_RTCD)
endif
endif
if HAVE_SSE
CELT_SOURCES += $(CELT_SOURCES_SSE)
endif
if HAVE_SSE2
CELT_SOURCES += $(CELT_SOURCES_SSE2)
if ENABLE_DEEP_PLC
LPCNET_SOURCES += $(DNN_SOURCES_SSE2)
endif
endif
if HAVE_SSE4_1
CELT_SOURCES += $(CELT_SOURCES_SSE4_1)
if ENABLE_DEEP_PLC
LPCNET_SOURCES += $(DNN_SOURCES_SSE4_1)
endif
endif
if HAVE_AVX2
CELT_SOURCES += $(CELT_SOURCES_AVX2)
if ENABLE_DEEP_PLC
LPCNET_SOURCES += $(DNN_SOURCES_AVX2)
endif
endif
endif

Expand Down Expand Up @@ -398,19 +410,22 @@ $(SSE_OBJ): CFLAGS += $(OPUS_X86_SSE_CFLAGS)
endif

if HAVE_SSE2
SSE2_OBJ = $(CELT_SOURCES_SSE2:.c=.lo)
SSE2_OBJ = $(CELT_SOURCES_SSE2:.c=.lo) \
$(DNN_SOURCES_SSE2:.c=.lo)
$(SSE2_OBJ): CFLAGS += $(OPUS_X86_SSE2_CFLAGS)
endif

if HAVE_SSE4_1
SSE4_1_OBJ = $(CELT_SOURCES_SSE4_1:.c=.lo) \
$(DNN_SOURCES_SSE4_1:.c=.lo) \
$(SILK_SOURCES_SSE4_1:.c=.lo) \
$(SILK_SOURCES_FIXED_SSE4_1:.c=.lo)
$(SSE4_1_OBJ): CFLAGS += $(OPUS_X86_SSE4_1_CFLAGS)
endif

if HAVE_AVX2
AVX2_OBJ = $(CELT_SOURCES_AVX2:.c=.lo)
AVX2_OBJ = $(CELT_SOURCES_AVX2:.c=.lo) \
$(DNN_SOURCES_AVX2:.c=.lo)
$(AVX2_OBJ): CFLAGS += $(OPUS_X86_AVX2_CFLAGS)
endif

Expand Down
2 changes: 1 addition & 1 deletion celt/x86/x86cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
# endif

# if defined(OPUS_X86_MAY_HAVE_AVX2)
# define MAY_HAVE_AVX2(name) name ## _avx
# define MAY_HAVE_AVX2(name) name ## _avx2
# else
# define MAY_HAVE_AVX2(name) name ## _c
# endif
Expand Down
41 changes: 22 additions & 19 deletions dnn/dred_rdovae_dec.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,33 +42,35 @@ static void conv1_cond_init(float *mem, int len, int dilation, int *init)
*init = 1;
}

void DRED_rdovae_decode_all(const RDOVAEDec *model, float *features, const float *state, const float *latents, int nb_latents)
void DRED_rdovae_decode_all(const RDOVAEDec *model, float *features, const float *state, const float *latents, int nb_latents, int arch)
{
int i;
RDOVAEDecState dec;
memset(&dec, 0, sizeof(dec));
dred_rdovae_dec_init_states(&dec, model, state);
dred_rdovae_dec_init_states(&dec, model, state, arch);
for (i = 0; i < 2*nb_latents; i += 2)
{
dred_rdovae_decode_qframe(
&dec,
model,
&features[2*i*DRED_NUM_FEATURES],
&latents[(i/2)*DRED_LATENT_DIM]);
&latents[(i/2)*DRED_LATENT_DIM],
arch);
}
}

void dred_rdovae_dec_init_states(
RDOVAEDecState *h, /* io: state buffer handle */
const RDOVAEDec *model,
const float *initial_state /* i: initial state */
const float *initial_state, /* i: initial state */
int arch
)
{
float hidden[DEC_HIDDEN_INIT_OUT_SIZE];
float state_init[DEC_GRU1_STATE_SIZE+DEC_GRU2_STATE_SIZE+DEC_GRU3_STATE_SIZE+DEC_GRU4_STATE_SIZE+DEC_GRU5_STATE_SIZE];
int counter=0;
compute_generic_dense(&model->dec_hidden_init, hidden, initial_state, ACTIVATION_TANH);
compute_generic_dense(&model->dec_gru_init, state_init, hidden, ACTIVATION_TANH);
compute_generic_dense(&model->dec_hidden_init, hidden, initial_state, ACTIVATION_TANH, arch);
compute_generic_dense(&model->dec_gru_init, state_init, hidden, ACTIVATION_TANH, arch);
OPUS_COPY(h->gru1_state, state_init, DEC_GRU1_STATE_SIZE);
counter += DEC_GRU1_STATE_SIZE;
OPUS_COPY(h->gru2_state, &state_init[counter], DEC_GRU2_STATE_SIZE);
Expand All @@ -86,51 +88,52 @@ void dred_rdovae_decode_qframe(
RDOVAEDecState *dec_state, /* io: state buffer handle */
const RDOVAEDec *model,
float *qframe, /* o: quadruple feature frame (four concatenated frames in reverse order) */
const float *input /* i: latent vector */
const float *input, /* i: latent vector */
int arch
)
{
float buffer[DEC_DENSE1_OUT_SIZE + DEC_GRU1_OUT_SIZE + DEC_GRU2_OUT_SIZE + DEC_GRU3_OUT_SIZE + DEC_GRU4_OUT_SIZE + DEC_GRU5_OUT_SIZE
+ DEC_CONV1_OUT_SIZE + DEC_CONV2_OUT_SIZE + DEC_CONV3_OUT_SIZE + DEC_CONV4_OUT_SIZE + DEC_CONV5_OUT_SIZE];
int output_index = 0;

/* run encoder stack and concatenate output in buffer*/
compute_generic_dense(&model->dec_dense1, &buffer[output_index], input, ACTIVATION_TANH);
compute_generic_dense(&model->dec_dense1, &buffer[output_index], input, ACTIVATION_TANH, arch);
output_index += DEC_DENSE1_OUT_SIZE;

compute_generic_gru(&model->dec_gru1_input, &model->dec_gru1_recurrent, dec_state->gru1_state, buffer);
compute_generic_gru(&model->dec_gru1_input, &model->dec_gru1_recurrent, dec_state->gru1_state, buffer, arch);
OPUS_COPY(&buffer[output_index], dec_state->gru1_state, DEC_GRU1_OUT_SIZE);
output_index += DEC_GRU1_OUT_SIZE;
conv1_cond_init(dec_state->conv1_state, output_index, 1, &dec_state->initialized);
compute_generic_conv1d(&model->dec_conv1, &buffer[output_index], dec_state->conv1_state, buffer, output_index, ACTIVATION_TANH);
compute_generic_conv1d(&model->dec_conv1, &buffer[output_index], dec_state->conv1_state, buffer, output_index, ACTIVATION_TANH, arch);
output_index += DEC_CONV1_OUT_SIZE;

compute_generic_gru(&model->dec_gru2_input, &model->dec_gru2_recurrent, dec_state->gru2_state, buffer);
compute_generic_gru(&model->dec_gru2_input, &model->dec_gru2_recurrent, dec_state->gru2_state, buffer, arch);
OPUS_COPY(&buffer[output_index], dec_state->gru2_state, DEC_GRU2_OUT_SIZE);
output_index += DEC_GRU2_OUT_SIZE;
conv1_cond_init(dec_state->conv2_state, output_index, 1, &dec_state->initialized);
compute_generic_conv1d(&model->dec_conv2, &buffer[output_index], dec_state->conv2_state, buffer, output_index, ACTIVATION_TANH);
compute_generic_conv1d(&model->dec_conv2, &buffer[output_index], dec_state->conv2_state, buffer, output_index, ACTIVATION_TANH, arch);
output_index += DEC_CONV2_OUT_SIZE;

compute_generic_gru(&model->dec_gru3_input, &model->dec_gru3_recurrent, dec_state->gru3_state, buffer);
compute_generic_gru(&model->dec_gru3_input, &model->dec_gru3_recurrent, dec_state->gru3_state, buffer, arch);
OPUS_COPY(&buffer[output_index], dec_state->gru3_state, DEC_GRU3_OUT_SIZE);
output_index += DEC_GRU3_OUT_SIZE;
conv1_cond_init(dec_state->conv3_state, output_index, 1, &dec_state->initialized);
compute_generic_conv1d(&model->dec_conv3, &buffer[output_index], dec_state->conv3_state, buffer, output_index, ACTIVATION_TANH);
compute_generic_conv1d(&model->dec_conv3, &buffer[output_index], dec_state->conv3_state, buffer, output_index, ACTIVATION_TANH, arch);
output_index += DEC_CONV3_OUT_SIZE;

compute_generic_gru(&model->dec_gru4_input, &model->dec_gru4_recurrent, dec_state->gru4_state, buffer);
compute_generic_gru(&model->dec_gru4_input, &model->dec_gru4_recurrent, dec_state->gru4_state, buffer, arch);
OPUS_COPY(&buffer[output_index], dec_state->gru4_state, DEC_GRU4_OUT_SIZE);
output_index += DEC_GRU4_OUT_SIZE;
conv1_cond_init(dec_state->conv4_state, output_index, 1, &dec_state->initialized);
compute_generic_conv1d(&model->dec_conv4, &buffer[output_index], dec_state->conv4_state, buffer, output_index, ACTIVATION_TANH);
compute_generic_conv1d(&model->dec_conv4, &buffer[output_index], dec_state->conv4_state, buffer, output_index, ACTIVATION_TANH, arch);
output_index += DEC_CONV4_OUT_SIZE;

compute_generic_gru(&model->dec_gru5_input, &model->dec_gru5_recurrent, dec_state->gru5_state, buffer);
compute_generic_gru(&model->dec_gru5_input, &model->dec_gru5_recurrent, dec_state->gru5_state, buffer, arch);
OPUS_COPY(&buffer[output_index], dec_state->gru5_state, DEC_GRU5_OUT_SIZE);
output_index += DEC_GRU5_OUT_SIZE;
conv1_cond_init(dec_state->conv5_state, output_index, 1, &dec_state->initialized);
compute_generic_conv1d(&model->dec_conv5, &buffer[output_index], dec_state->conv5_state, buffer, output_index, ACTIVATION_TANH);
compute_generic_conv1d(&model->dec_conv5, &buffer[output_index], dec_state->conv5_state, buffer, output_index, ACTIVATION_TANH, arch);
output_index += DEC_CONV5_OUT_SIZE;

compute_generic_dense(&model->dec_output, qframe, buffer, ACTIVATION_LINEAR);
compute_generic_dense(&model->dec_output, qframe, buffer, ACTIVATION_LINEAR, arch);
}
6 changes: 3 additions & 3 deletions dnn/dred_rdovae_dec.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ struct RDOVAEDecStruct {
float conv5_state[DEC_CONV5_STATE_SIZE];
};

void dred_rdovae_dec_init_states(RDOVAEDecState *h, const RDOVAEDec *model, const float * initial_state);
void dred_rdovae_decode_qframe(RDOVAEDecState *h, const RDOVAEDec *model, float *qframe, const float * z);
void DRED_rdovae_decode_all(const RDOVAEDec *model, float *features, const float *state, const float *latents, int nb_latents);
void dred_rdovae_dec_init_states(RDOVAEDecState *h, const RDOVAEDec *model, const float * initial_state, int arch);
void dred_rdovae_decode_qframe(RDOVAEDecState *h, const RDOVAEDec *model, float *qframe, const float * z, int arch);
void DRED_rdovae_decode_all(const RDOVAEDec *model, float *features, const float *state, const float *latents, int nb_latents, int arch);

#endif
31 changes: 16 additions & 15 deletions dnn/dred_rdovae_enc.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ void dred_rdovae_encode_dframe(
const RDOVAEEnc *model,
float *latents, /* o: latent vector */
float *initial_state, /* o: initial state */
const float *input /* i: double feature frame (concatenated) */
const float *input, /* i: double feature frame (concatenated) */
int arch
)
{
float padded_latents[DRED_PADDED_LATENT_DIM];
Expand All @@ -61,49 +62,49 @@ void dred_rdovae_encode_dframe(
int output_index = 0;

/* run encoder stack and concatenate output in buffer*/
compute_generic_dense(&model->enc_dense1, &buffer[output_index], input, ACTIVATION_TANH);
compute_generic_dense(&model->enc_dense1, &buffer[output_index], input, ACTIVATION_TANH, arch);
output_index += ENC_DENSE1_OUT_SIZE;

compute_generic_gru(&model->enc_gru1_input, &model->enc_gru1_recurrent, enc_state->gru1_state, buffer);
compute_generic_gru(&model->enc_gru1_input, &model->enc_gru1_recurrent, enc_state->gru1_state, buffer, arch);
OPUS_COPY(&buffer[output_index], enc_state->gru1_state, ENC_GRU1_OUT_SIZE);
output_index += ENC_GRU1_OUT_SIZE;
conv1_cond_init(enc_state->conv1_state, output_index, 1, &enc_state->initialized);
compute_generic_conv1d(&model->enc_conv1, &buffer[output_index], enc_state->conv1_state, buffer, output_index, ACTIVATION_TANH);
compute_generic_conv1d(&model->enc_conv1, &buffer[output_index], enc_state->conv1_state, buffer, output_index, ACTIVATION_TANH, arch);
output_index += ENC_CONV1_OUT_SIZE;

compute_generic_gru(&model->enc_gru2_input, &model->enc_gru2_recurrent, enc_state->gru2_state, buffer);
compute_generic_gru(&model->enc_gru2_input, &model->enc_gru2_recurrent, enc_state->gru2_state, buffer, arch);
OPUS_COPY(&buffer[output_index], enc_state->gru2_state, ENC_GRU2_OUT_SIZE);
output_index += ENC_GRU2_OUT_SIZE;
conv1_cond_init(enc_state->conv2_state, output_index, 2, &enc_state->initialized);
compute_generic_conv1d_dilation(&model->enc_conv2, &buffer[output_index], enc_state->conv2_state, buffer, output_index, 2, ACTIVATION_TANH);
compute_generic_conv1d_dilation(&model->enc_conv2, &buffer[output_index], enc_state->conv2_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
output_index += ENC_CONV2_OUT_SIZE;

compute_generic_gru(&model->enc_gru3_input, &model->enc_gru3_recurrent, enc_state->gru3_state, buffer);
compute_generic_gru(&model->enc_gru3_input, &model->enc_gru3_recurrent, enc_state->gru3_state, buffer, arch);
OPUS_COPY(&buffer[output_index], enc_state->gru3_state, ENC_GRU3_OUT_SIZE);
output_index += ENC_GRU3_OUT_SIZE;
conv1_cond_init(enc_state->conv3_state, output_index, 2, &enc_state->initialized);
compute_generic_conv1d_dilation(&model->enc_conv3, &buffer[output_index], enc_state->conv3_state, buffer, output_index, 2, ACTIVATION_TANH);
compute_generic_conv1d_dilation(&model->enc_conv3, &buffer[output_index], enc_state->conv3_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
output_index += ENC_CONV3_OUT_SIZE;

compute_generic_gru(&model->enc_gru4_input, &model->enc_gru4_recurrent, enc_state->gru4_state, buffer);
compute_generic_gru(&model->enc_gru4_input, &model->enc_gru4_recurrent, enc_state->gru4_state, buffer, arch);
OPUS_COPY(&buffer[output_index], enc_state->gru4_state, ENC_GRU4_OUT_SIZE);
output_index += ENC_GRU4_OUT_SIZE;
conv1_cond_init(enc_state->conv4_state, output_index, 2, &enc_state->initialized);
compute_generic_conv1d_dilation(&model->enc_conv4, &buffer[output_index], enc_state->conv4_state, buffer, output_index, 2, ACTIVATION_TANH);
compute_generic_conv1d_dilation(&model->enc_conv4, &buffer[output_index], enc_state->conv4_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
output_index += ENC_CONV4_OUT_SIZE;

compute_generic_gru(&model->enc_gru5_input, &model->enc_gru5_recurrent, enc_state->gru5_state, buffer);
compute_generic_gru(&model->enc_gru5_input, &model->enc_gru5_recurrent, enc_state->gru5_state, buffer, arch);
OPUS_COPY(&buffer[output_index], enc_state->gru5_state, ENC_GRU5_OUT_SIZE);
output_index += ENC_GRU5_OUT_SIZE;
conv1_cond_init(enc_state->conv5_state, output_index, 2, &enc_state->initialized);
compute_generic_conv1d_dilation(&model->enc_conv5, &buffer[output_index], enc_state->conv5_state, buffer, output_index, 2, ACTIVATION_TANH);
compute_generic_conv1d_dilation(&model->enc_conv5, &buffer[output_index], enc_state->conv5_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
output_index += ENC_CONV5_OUT_SIZE;

compute_generic_dense(&model->enc_zdense, padded_latents, buffer, ACTIVATION_LINEAR);
compute_generic_dense(&model->enc_zdense, padded_latents, buffer, ACTIVATION_LINEAR, arch);
OPUS_COPY(latents, padded_latents, DRED_LATENT_DIM);

/* next, calculate initial state */
compute_generic_dense(&model->gdense1, state_hidden, buffer, ACTIVATION_TANH);
compute_generic_dense(&model->gdense2, padded_state, state_hidden, ACTIVATION_LINEAR);
compute_generic_dense(&model->gdense1, state_hidden, buffer, ACTIVATION_TANH, arch);
compute_generic_dense(&model->gdense2, padded_state, state_hidden, ACTIVATION_LINEAR, arch);
OPUS_COPY(initial_state, padded_state, DRED_STATE_DIM);
}
2 changes: 1 addition & 1 deletion dnn/dred_rdovae_enc.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ struct RDOVAEEncStruct {
float conv5_state[2*ENC_CONV5_STATE_SIZE];
};

void dred_rdovae_encode_dframe(RDOVAEEncState *enc_state, const RDOVAEEnc *model, float *latents, float *initial_state, const float *input);
void dred_rdovae_encode_dframe(RDOVAEEncState *enc_state, const RDOVAEEnc *model, float *latents, float *initial_state, const float *input, int arch);


#endif
5 changes: 4 additions & 1 deletion dnn/dump_data.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include "lpcnet.h"
#include "lpcnet_private.h"
#include "os_support.h"
#include "cpu_support.h"


static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
Expand Down Expand Up @@ -135,7 +136,9 @@ int main(int argc, char **argv) {
FILE *fnoise = NULL;
float noise_gain = 0;
long noise_size=0;
int arch;
srand(getpid());
arch = opus_select_arch();
st = lpcnet_encoder_create();
argv0=argv[0];
if (argc == 5 && strcmp(argv[1], "-btrain")==0) {
Expand Down Expand Up @@ -244,7 +247,7 @@ int main(int argc, char **argv) {
for (i=0;i<FRAME_SIZE;i++) x[i] += rand()/(float)RAND_MAX - .5;
/* PCM is delayed by 1/2 frame to make the features centered on the frames. */
for (i=0;i<FRAME_SIZE-TRAINING_OFFSET;i++) pcm[i+TRAINING_OFFSET] = float2short(x[i]);
compute_frame_features(st, x);
compute_frame_features(st, x, arch);

if (fpcm) {
compute_noise(noisebuf, noise_std);
Expand Down
Loading

0 comments on commit 12df2de

Please sign in to comment.