Skip to content

Commit 12df2de

Browse files
committed
Adding RTCD for DNN code
Starting with compute_linear()
1 parent 58923f6 commit 12df2de

31 files changed

+525
-158
lines changed

Makefile.am

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,18 +50,30 @@ if CPU_X86
5050
if HAVE_RTCD
5151
CELT_SOURCES += $(CELT_SOURCES_X86_RTCD)
5252
SILK_SOURCES += $(SILK_SOURCES_X86_RTCD)
53+
if HAVE_SSE
54+
LPCNET_SOURCES += $(DNN_SOURCES_X86_RTCD)
55+
endif
5356
endif
5457
if HAVE_SSE
5558
CELT_SOURCES += $(CELT_SOURCES_SSE)
5659
endif
5760
if HAVE_SSE2
5861
CELT_SOURCES += $(CELT_SOURCES_SSE2)
62+
if ENABLE_DEEP_PLC
63+
LPCNET_SOURCES += $(DNN_SOURCES_SSE2)
64+
endif
5965
endif
6066
if HAVE_SSE4_1
6167
CELT_SOURCES += $(CELT_SOURCES_SSE4_1)
68+
if ENABLE_DEEP_PLC
69+
LPCNET_SOURCES += $(DNN_SOURCES_SSE4_1)
70+
endif
6271
endif
6372
if HAVE_AVX2
6473
CELT_SOURCES += $(CELT_SOURCES_AVX2)
74+
if ENABLE_DEEP_PLC
75+
LPCNET_SOURCES += $(DNN_SOURCES_AVX2)
76+
endif
6577
endif
6678
endif
6779

@@ -398,19 +410,22 @@ $(SSE_OBJ): CFLAGS += $(OPUS_X86_SSE_CFLAGS)
398410
endif
399411

400412
if HAVE_SSE2
401-
SSE2_OBJ = $(CELT_SOURCES_SSE2:.c=.lo)
413+
SSE2_OBJ = $(CELT_SOURCES_SSE2:.c=.lo) \
414+
$(DNN_SOURCES_SSE2:.c=.lo)
402415
$(SSE2_OBJ): CFLAGS += $(OPUS_X86_SSE2_CFLAGS)
403416
endif
404417

405418
if HAVE_SSE4_1
406419
SSE4_1_OBJ = $(CELT_SOURCES_SSE4_1:.c=.lo) \
420+
$(DNN_SOURCES_SSE4_1:.c=.lo) \
407421
$(SILK_SOURCES_SSE4_1:.c=.lo) \
408422
$(SILK_SOURCES_FIXED_SSE4_1:.c=.lo)
409423
$(SSE4_1_OBJ): CFLAGS += $(OPUS_X86_SSE4_1_CFLAGS)
410424
endif
411425

412426
if HAVE_AVX2
413-
AVX2_OBJ = $(CELT_SOURCES_AVX2:.c=.lo)
427+
AVX2_OBJ = $(CELT_SOURCES_AVX2:.c=.lo) \
428+
$(DNN_SOURCES_AVX2:.c=.lo)
414429
$(AVX2_OBJ): CFLAGS += $(OPUS_X86_AVX2_CFLAGS)
415430
endif
416431

celt/x86/x86cpu.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
# endif
4848

4949
# if defined(OPUS_X86_MAY_HAVE_AVX2)
50-
# define MAY_HAVE_AVX2(name) name ## _avx
50+
# define MAY_HAVE_AVX2(name) name ## _avx2
5151
# else
5252
# define MAY_HAVE_AVX2(name) name ## _c
5353
# endif

dnn/dred_rdovae_dec.c

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -42,33 +42,35 @@ static void conv1_cond_init(float *mem, int len, int dilation, int *init)
4242
*init = 1;
4343
}
4444

45-
void DRED_rdovae_decode_all(const RDOVAEDec *model, float *features, const float *state, const float *latents, int nb_latents)
45+
void DRED_rdovae_decode_all(const RDOVAEDec *model, float *features, const float *state, const float *latents, int nb_latents, int arch)
4646
{
4747
int i;
4848
RDOVAEDecState dec;
4949
memset(&dec, 0, sizeof(dec));
50-
dred_rdovae_dec_init_states(&dec, model, state);
50+
dred_rdovae_dec_init_states(&dec, model, state, arch);
5151
for (i = 0; i < 2*nb_latents; i += 2)
5252
{
5353
dred_rdovae_decode_qframe(
5454
&dec,
5555
model,
5656
&features[2*i*DRED_NUM_FEATURES],
57-
&latents[(i/2)*DRED_LATENT_DIM]);
57+
&latents[(i/2)*DRED_LATENT_DIM],
58+
arch);
5859
}
5960
}
6061

6162
void dred_rdovae_dec_init_states(
6263
RDOVAEDecState *h, /* io: state buffer handle */
6364
const RDOVAEDec *model,
64-
const float *initial_state /* i: initial state */
65+
const float *initial_state, /* i: initial state */
66+
int arch
6567
)
6668
{
6769
float hidden[DEC_HIDDEN_INIT_OUT_SIZE];
6870
float state_init[DEC_GRU1_STATE_SIZE+DEC_GRU2_STATE_SIZE+DEC_GRU3_STATE_SIZE+DEC_GRU4_STATE_SIZE+DEC_GRU5_STATE_SIZE];
6971
int counter=0;
70-
compute_generic_dense(&model->dec_hidden_init, hidden, initial_state, ACTIVATION_TANH);
71-
compute_generic_dense(&model->dec_gru_init, state_init, hidden, ACTIVATION_TANH);
72+
compute_generic_dense(&model->dec_hidden_init, hidden, initial_state, ACTIVATION_TANH, arch);
73+
compute_generic_dense(&model->dec_gru_init, state_init, hidden, ACTIVATION_TANH, arch);
7274
OPUS_COPY(h->gru1_state, state_init, DEC_GRU1_STATE_SIZE);
7375
counter += DEC_GRU1_STATE_SIZE;
7476
OPUS_COPY(h->gru2_state, &state_init[counter], DEC_GRU2_STATE_SIZE);
@@ -86,51 +88,52 @@ void dred_rdovae_decode_qframe(
8688
RDOVAEDecState *dec_state, /* io: state buffer handle */
8789
const RDOVAEDec *model,
8890
float *qframe, /* o: quadruple feature frame (four concatenated frames in reverse order) */
89-
const float *input /* i: latent vector */
91+
const float *input, /* i: latent vector */
92+
int arch
9093
)
9194
{
9295
float buffer[DEC_DENSE1_OUT_SIZE + DEC_GRU1_OUT_SIZE + DEC_GRU2_OUT_SIZE + DEC_GRU3_OUT_SIZE + DEC_GRU4_OUT_SIZE + DEC_GRU5_OUT_SIZE
9396
+ DEC_CONV1_OUT_SIZE + DEC_CONV2_OUT_SIZE + DEC_CONV3_OUT_SIZE + DEC_CONV4_OUT_SIZE + DEC_CONV5_OUT_SIZE];
9497
int output_index = 0;
9598

9699
/* run encoder stack and concatenate output in buffer*/
97-
compute_generic_dense(&model->dec_dense1, &buffer[output_index], input, ACTIVATION_TANH);
100+
compute_generic_dense(&model->dec_dense1, &buffer[output_index], input, ACTIVATION_TANH, arch);
98101
output_index += DEC_DENSE1_OUT_SIZE;
99102

100-
compute_generic_gru(&model->dec_gru1_input, &model->dec_gru1_recurrent, dec_state->gru1_state, buffer);
103+
compute_generic_gru(&model->dec_gru1_input, &model->dec_gru1_recurrent, dec_state->gru1_state, buffer, arch);
101104
OPUS_COPY(&buffer[output_index], dec_state->gru1_state, DEC_GRU1_OUT_SIZE);
102105
output_index += DEC_GRU1_OUT_SIZE;
103106
conv1_cond_init(dec_state->conv1_state, output_index, 1, &dec_state->initialized);
104-
compute_generic_conv1d(&model->dec_conv1, &buffer[output_index], dec_state->conv1_state, buffer, output_index, ACTIVATION_TANH);
107+
compute_generic_conv1d(&model->dec_conv1, &buffer[output_index], dec_state->conv1_state, buffer, output_index, ACTIVATION_TANH, arch);
105108
output_index += DEC_CONV1_OUT_SIZE;
106109

107-
compute_generic_gru(&model->dec_gru2_input, &model->dec_gru2_recurrent, dec_state->gru2_state, buffer);
110+
compute_generic_gru(&model->dec_gru2_input, &model->dec_gru2_recurrent, dec_state->gru2_state, buffer, arch);
108111
OPUS_COPY(&buffer[output_index], dec_state->gru2_state, DEC_GRU2_OUT_SIZE);
109112
output_index += DEC_GRU2_OUT_SIZE;
110113
conv1_cond_init(dec_state->conv2_state, output_index, 1, &dec_state->initialized);
111-
compute_generic_conv1d(&model->dec_conv2, &buffer[output_index], dec_state->conv2_state, buffer, output_index, ACTIVATION_TANH);
114+
compute_generic_conv1d(&model->dec_conv2, &buffer[output_index], dec_state->conv2_state, buffer, output_index, ACTIVATION_TANH, arch);
112115
output_index += DEC_CONV2_OUT_SIZE;
113116

114-
compute_generic_gru(&model->dec_gru3_input, &model->dec_gru3_recurrent, dec_state->gru3_state, buffer);
117+
compute_generic_gru(&model->dec_gru3_input, &model->dec_gru3_recurrent, dec_state->gru3_state, buffer, arch);
115118
OPUS_COPY(&buffer[output_index], dec_state->gru3_state, DEC_GRU3_OUT_SIZE);
116119
output_index += DEC_GRU3_OUT_SIZE;
117120
conv1_cond_init(dec_state->conv3_state, output_index, 1, &dec_state->initialized);
118-
compute_generic_conv1d(&model->dec_conv3, &buffer[output_index], dec_state->conv3_state, buffer, output_index, ACTIVATION_TANH);
121+
compute_generic_conv1d(&model->dec_conv3, &buffer[output_index], dec_state->conv3_state, buffer, output_index, ACTIVATION_TANH, arch);
119122
output_index += DEC_CONV3_OUT_SIZE;
120123

121-
compute_generic_gru(&model->dec_gru4_input, &model->dec_gru4_recurrent, dec_state->gru4_state, buffer);
124+
compute_generic_gru(&model->dec_gru4_input, &model->dec_gru4_recurrent, dec_state->gru4_state, buffer, arch);
122125
OPUS_COPY(&buffer[output_index], dec_state->gru4_state, DEC_GRU4_OUT_SIZE);
123126
output_index += DEC_GRU4_OUT_SIZE;
124127
conv1_cond_init(dec_state->conv4_state, output_index, 1, &dec_state->initialized);
125-
compute_generic_conv1d(&model->dec_conv4, &buffer[output_index], dec_state->conv4_state, buffer, output_index, ACTIVATION_TANH);
128+
compute_generic_conv1d(&model->dec_conv4, &buffer[output_index], dec_state->conv4_state, buffer, output_index, ACTIVATION_TANH, arch);
126129
output_index += DEC_CONV4_OUT_SIZE;
127130

128-
compute_generic_gru(&model->dec_gru5_input, &model->dec_gru5_recurrent, dec_state->gru5_state, buffer);
131+
compute_generic_gru(&model->dec_gru5_input, &model->dec_gru5_recurrent, dec_state->gru5_state, buffer, arch);
129132
OPUS_COPY(&buffer[output_index], dec_state->gru5_state, DEC_GRU5_OUT_SIZE);
130133
output_index += DEC_GRU5_OUT_SIZE;
131134
conv1_cond_init(dec_state->conv5_state, output_index, 1, &dec_state->initialized);
132-
compute_generic_conv1d(&model->dec_conv5, &buffer[output_index], dec_state->conv5_state, buffer, output_index, ACTIVATION_TANH);
135+
compute_generic_conv1d(&model->dec_conv5, &buffer[output_index], dec_state->conv5_state, buffer, output_index, ACTIVATION_TANH, arch);
133136
output_index += DEC_CONV5_OUT_SIZE;
134137

135-
compute_generic_dense(&model->dec_output, qframe, buffer, ACTIVATION_LINEAR);
138+
compute_generic_dense(&model->dec_output, qframe, buffer, ACTIVATION_LINEAR, arch);
136139
}

dnn/dred_rdovae_dec.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@ struct RDOVAEDecStruct {
4646
float conv5_state[DEC_CONV5_STATE_SIZE];
4747
};
4848

49-
void dred_rdovae_dec_init_states(RDOVAEDecState *h, const RDOVAEDec *model, const float * initial_state);
50-
void dred_rdovae_decode_qframe(RDOVAEDecState *h, const RDOVAEDec *model, float *qframe, const float * z);
51-
void DRED_rdovae_decode_all(const RDOVAEDec *model, float *features, const float *state, const float *latents, int nb_latents);
49+
void dred_rdovae_dec_init_states(RDOVAEDecState *h, const RDOVAEDec *model, const float * initial_state, int arch);
50+
void dred_rdovae_decode_qframe(RDOVAEDecState *h, const RDOVAEDec *model, float *qframe, const float * z, int arch);
51+
void DRED_rdovae_decode_all(const RDOVAEDec *model, float *features, const float *state, const float *latents, int nb_latents, int arch);
5252

5353
#endif

dnn/dred_rdovae_enc.c

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ void dred_rdovae_encode_dframe(
5050
const RDOVAEEnc *model,
5151
float *latents, /* o: latent vector */
5252
float *initial_state, /* o: initial state */
53-
const float *input /* i: double feature frame (concatenated) */
53+
const float *input, /* i: double feature frame (concatenated) */
54+
int arch
5455
)
5556
{
5657
float padded_latents[DRED_PADDED_LATENT_DIM];
@@ -61,49 +62,49 @@ void dred_rdovae_encode_dframe(
6162
int output_index = 0;
6263

6364
/* run encoder stack and concatenate output in buffer*/
64-
compute_generic_dense(&model->enc_dense1, &buffer[output_index], input, ACTIVATION_TANH);
65+
compute_generic_dense(&model->enc_dense1, &buffer[output_index], input, ACTIVATION_TANH, arch);
6566
output_index += ENC_DENSE1_OUT_SIZE;
6667

67-
compute_generic_gru(&model->enc_gru1_input, &model->enc_gru1_recurrent, enc_state->gru1_state, buffer);
68+
compute_generic_gru(&model->enc_gru1_input, &model->enc_gru1_recurrent, enc_state->gru1_state, buffer, arch);
6869
OPUS_COPY(&buffer[output_index], enc_state->gru1_state, ENC_GRU1_OUT_SIZE);
6970
output_index += ENC_GRU1_OUT_SIZE;
7071
conv1_cond_init(enc_state->conv1_state, output_index, 1, &enc_state->initialized);
71-
compute_generic_conv1d(&model->enc_conv1, &buffer[output_index], enc_state->conv1_state, buffer, output_index, ACTIVATION_TANH);
72+
compute_generic_conv1d(&model->enc_conv1, &buffer[output_index], enc_state->conv1_state, buffer, output_index, ACTIVATION_TANH, arch);
7273
output_index += ENC_CONV1_OUT_SIZE;
7374

74-
compute_generic_gru(&model->enc_gru2_input, &model->enc_gru2_recurrent, enc_state->gru2_state, buffer);
75+
compute_generic_gru(&model->enc_gru2_input, &model->enc_gru2_recurrent, enc_state->gru2_state, buffer, arch);
7576
OPUS_COPY(&buffer[output_index], enc_state->gru2_state, ENC_GRU2_OUT_SIZE);
7677
output_index += ENC_GRU2_OUT_SIZE;
7778
conv1_cond_init(enc_state->conv2_state, output_index, 2, &enc_state->initialized);
78-
compute_generic_conv1d_dilation(&model->enc_conv2, &buffer[output_index], enc_state->conv2_state, buffer, output_index, 2, ACTIVATION_TANH);
79+
compute_generic_conv1d_dilation(&model->enc_conv2, &buffer[output_index], enc_state->conv2_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
7980
output_index += ENC_CONV2_OUT_SIZE;
8081

81-
compute_generic_gru(&model->enc_gru3_input, &model->enc_gru3_recurrent, enc_state->gru3_state, buffer);
82+
compute_generic_gru(&model->enc_gru3_input, &model->enc_gru3_recurrent, enc_state->gru3_state, buffer, arch);
8283
OPUS_COPY(&buffer[output_index], enc_state->gru3_state, ENC_GRU3_OUT_SIZE);
8384
output_index += ENC_GRU3_OUT_SIZE;
8485
conv1_cond_init(enc_state->conv3_state, output_index, 2, &enc_state->initialized);
85-
compute_generic_conv1d_dilation(&model->enc_conv3, &buffer[output_index], enc_state->conv3_state, buffer, output_index, 2, ACTIVATION_TANH);
86+
compute_generic_conv1d_dilation(&model->enc_conv3, &buffer[output_index], enc_state->conv3_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
8687
output_index += ENC_CONV3_OUT_SIZE;
8788

88-
compute_generic_gru(&model->enc_gru4_input, &model->enc_gru4_recurrent, enc_state->gru4_state, buffer);
89+
compute_generic_gru(&model->enc_gru4_input, &model->enc_gru4_recurrent, enc_state->gru4_state, buffer, arch);
8990
OPUS_COPY(&buffer[output_index], enc_state->gru4_state, ENC_GRU4_OUT_SIZE);
9091
output_index += ENC_GRU4_OUT_SIZE;
9192
conv1_cond_init(enc_state->conv4_state, output_index, 2, &enc_state->initialized);
92-
compute_generic_conv1d_dilation(&model->enc_conv4, &buffer[output_index], enc_state->conv4_state, buffer, output_index, 2, ACTIVATION_TANH);
93+
compute_generic_conv1d_dilation(&model->enc_conv4, &buffer[output_index], enc_state->conv4_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
9394
output_index += ENC_CONV4_OUT_SIZE;
9495

95-
compute_generic_gru(&model->enc_gru5_input, &model->enc_gru5_recurrent, enc_state->gru5_state, buffer);
96+
compute_generic_gru(&model->enc_gru5_input, &model->enc_gru5_recurrent, enc_state->gru5_state, buffer, arch);
9697
OPUS_COPY(&buffer[output_index], enc_state->gru5_state, ENC_GRU5_OUT_SIZE);
9798
output_index += ENC_GRU5_OUT_SIZE;
9899
conv1_cond_init(enc_state->conv5_state, output_index, 2, &enc_state->initialized);
99-
compute_generic_conv1d_dilation(&model->enc_conv5, &buffer[output_index], enc_state->conv5_state, buffer, output_index, 2, ACTIVATION_TANH);
100+
compute_generic_conv1d_dilation(&model->enc_conv5, &buffer[output_index], enc_state->conv5_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
100101
output_index += ENC_CONV5_OUT_SIZE;
101102

102-
compute_generic_dense(&model->enc_zdense, padded_latents, buffer, ACTIVATION_LINEAR);
103+
compute_generic_dense(&model->enc_zdense, padded_latents, buffer, ACTIVATION_LINEAR, arch);
103104
OPUS_COPY(latents, padded_latents, DRED_LATENT_DIM);
104105

105106
/* next, calculate initial state */
106-
compute_generic_dense(&model->gdense1, state_hidden, buffer, ACTIVATION_TANH);
107-
compute_generic_dense(&model->gdense2, padded_state, state_hidden, ACTIVATION_LINEAR);
107+
compute_generic_dense(&model->gdense1, state_hidden, buffer, ACTIVATION_TANH, arch);
108+
compute_generic_dense(&model->gdense2, padded_state, state_hidden, ACTIVATION_LINEAR, arch);
108109
OPUS_COPY(initial_state, padded_state, DRED_STATE_DIM);
109110
}

dnn/dred_rdovae_enc.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ struct RDOVAEEncStruct {
4646
float conv5_state[2*ENC_CONV5_STATE_SIZE];
4747
};
4848

49-
void dred_rdovae_encode_dframe(RDOVAEEncState *enc_state, const RDOVAEEnc *model, float *latents, float *initial_state, const float *input);
49+
void dred_rdovae_encode_dframe(RDOVAEEncState *enc_state, const RDOVAEEnc *model, float *latents, float *initial_state, const float *input, int arch);
5050

5151

5252
#endif

dnn/dump_data.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include "lpcnet.h"
4343
#include "lpcnet_private.h"
4444
#include "os_support.h"
45+
#include "cpu_support.h"
4546

4647

4748
static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
@@ -135,7 +136,9 @@ int main(int argc, char **argv) {
135136
FILE *fnoise = NULL;
136137
float noise_gain = 0;
137138
long noise_size=0;
139+
int arch;
138140
srand(getpid());
141+
arch = opus_select_arch();
139142
st = lpcnet_encoder_create();
140143
argv0=argv[0];
141144
if (argc == 5 && strcmp(argv[1], "-btrain")==0) {
@@ -244,7 +247,7 @@ int main(int argc, char **argv) {
244247
for (i=0;i<FRAME_SIZE;i++) x[i] += rand()/(float)RAND_MAX - .5;
245248
/* PCM is delayed by 1/2 frame to make the features centered on the frames. */
246249
for (i=0;i<FRAME_SIZE-TRAINING_OFFSET;i++) pcm[i+TRAINING_OFFSET] = float2short(x[i]);
247-
compute_frame_features(st, x);
250+
compute_frame_features(st, x, arch);
248251

249252
if (fpcm) {
250253
compute_noise(noisebuf, noise_std);

0 commit comments

Comments
 (0)