|
@@ -3,11 +3,96 @@
|
|
|
#include <stdio.h>
|
|
#include <stdio.h>
|
|
|
#include <stdlib.h>
|
|
#include <stdlib.h>
|
|
|
#include <string.h>
|
|
#include <string.h>
|
|
|
|
|
+#include <fstream>
|
|
|
|
|
+#include <assert.h>
|
|
|
|
|
|
|
|
#include "Audio.h"
|
|
#include "Audio.h"
|
|
|
|
|
+#include "precomp.h"
|
|
|
|
|
|
|
|
using namespace std;
|
|
using namespace std;
|
|
|
|
|
|
|
|
|
|
+// see http://soundfile.sapp.org/doc/WaveFormat/
|
|
|
|
|
+// Note: We assume little endian here
|
|
|
|
|
+struct WaveHeader {
|
|
|
|
|
+ bool Validate() const {
|
|
|
|
|
+ // F F I R
|
|
|
|
|
+ if (chunk_id != 0x46464952) {
|
|
|
|
|
+ printf("Expected chunk_id RIFF. Given: 0x%08x\n", chunk_id);
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
+ // E V A W
|
|
|
|
|
+ if (format != 0x45564157) {
|
|
|
|
|
+ printf("Expected format WAVE. Given: 0x%08x\n", format);
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (subchunk1_id != 0x20746d66) {
|
|
|
|
|
+ printf("Expected subchunk1_id 0x20746d66. Given: 0x%08x\n",
|
|
|
|
|
+ subchunk1_id);
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (subchunk1_size != 16) { // 16 for PCM
|
|
|
|
|
+ printf("Expected subchunk1_size 16. Given: %d\n",
|
|
|
|
|
+ subchunk1_size);
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (audio_format != 1) { // 1 for PCM
|
|
|
|
|
+ printf("Expected audio_format 1. Given: %d\n", audio_format);
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (num_channels != 1) { // we support only single channel for now
|
|
|
|
|
+ printf("Expected single channel. Given: %d\n", num_channels);
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
+ if (byte_rate != (sample_rate * num_channels * bits_per_sample / 8)) {
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (block_align != (num_channels * bits_per_sample / 8)) {
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (bits_per_sample != 16) { // we support only 16 bits per sample
|
|
|
|
|
+ printf("Expected bits_per_sample 16. Given: %d\n",
|
|
|
|
|
+ bits_per_sample);
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
+ return true;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // See https://en.wikipedia.org/wiki/WAV#Metadata and
|
|
|
|
|
+ // https://www.robotplanet.dk/audio/wav_meta_data/riff_mci.pdf
|
|
|
|
|
+ void SeekToDataChunk(std::istream &is) {
|
|
|
|
|
+ // a t a d
|
|
|
|
|
+ while (is && subchunk2_id != 0x61746164) {
|
|
|
|
|
+ // const char *p = reinterpret_cast<const char *>(&subchunk2_id);
|
|
|
|
|
+ // printf("Skip chunk (%x): %c%c%c%c of size: %d\n", subchunk2_id, p[0],
|
|
|
|
|
+ // p[1], p[2], p[3], subchunk2_size);
|
|
|
|
|
+ is.seekg(subchunk2_size, std::istream::cur);
|
|
|
|
|
+ is.read(reinterpret_cast<char *>(&subchunk2_id), sizeof(int32_t));
|
|
|
|
|
+ is.read(reinterpret_cast<char *>(&subchunk2_size), sizeof(int32_t));
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ int32_t chunk_id;
|
|
|
|
|
+ int32_t chunk_size;
|
|
|
|
|
+ int32_t format;
|
|
|
|
|
+ int32_t subchunk1_id;
|
|
|
|
|
+ int32_t subchunk1_size;
|
|
|
|
|
+ int16_t audio_format;
|
|
|
|
|
+ int16_t num_channels;
|
|
|
|
|
+ int32_t sample_rate;
|
|
|
|
|
+ int32_t byte_rate;
|
|
|
|
|
+ int16_t block_align;
|
|
|
|
|
+ int16_t bits_per_sample;
|
|
|
|
|
+ int32_t subchunk2_id; // a tag of this chunk
|
|
|
|
|
+ int32_t subchunk2_size; // size of subchunk2
|
|
|
|
|
+};
|
|
|
|
|
+static_assert(sizeof(WaveHeader) == WAV_HEADER_SIZE, "");
|
|
|
|
|
+
|
|
|
class AudioWindow {
|
|
class AudioWindow {
|
|
|
private:
|
|
private:
|
|
|
int *window;
|
|
int *window;
|
|
@@ -56,7 +141,7 @@ int AudioFrame::set_end(int val, int max_len)
|
|
|
float frame_length = 400;
|
|
float frame_length = 400;
|
|
|
float frame_shift = 160;
|
|
float frame_shift = 160;
|
|
|
float num_new_samples =
|
|
float num_new_samples =
|
|
|
- ceil((num_samples - 400) / frame_shift) * frame_shift + frame_length;
|
|
|
|
|
|
|
+ ceil((num_samples - frame_length) / frame_shift) * frame_shift + frame_length;
|
|
|
|
|
|
|
|
end = start + num_new_samples;
|
|
end = start + num_new_samples;
|
|
|
len = (int)num_new_samples;
|
|
len = (int)num_new_samples;
|
|
@@ -111,120 +196,150 @@ Audio::~Audio()
|
|
|
|
|
|
|
|
void Audio::disp()
|
|
void Audio::disp()
|
|
|
{
|
|
{
|
|
|
- printf("Audio time is %f s. len is %d\n", (float)speech_len / 16000,
|
|
|
|
|
|
|
+ printf("Audio time is %f s. len is %d\n", (float)speech_len / model_sample_rate,
|
|
|
speech_len);
|
|
speech_len);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
float Audio::get_time_len()
|
|
float Audio::get_time_len()
|
|
|
{
|
|
{
|
|
|
- return (float)speech_len / 16000;
|
|
|
|
|
- //speech_len);
|
|
|
|
|
|
|
+ return (float)speech_len / model_sample_rate;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-bool Audio::loadwav(const char *filename)
|
|
|
|
|
|
|
+void Audio::wavResample(int32_t sampling_rate, const float *waveform,
|
|
|
|
|
+ int32_t n)
|
|
|
{
|
|
{
|
|
|
|
|
+ printf(
|
|
|
|
|
+ "Creating a resampler:\n"
|
|
|
|
|
+ " in_sample_rate: %d\n"
|
|
|
|
|
+ " output_sample_rate: %d\n",
|
|
|
|
|
+ sampling_rate, static_cast<int32_t>(model_sample_rate));
|
|
|
|
|
+ float min_freq =
|
|
|
|
|
+ std::min<int32_t>(sampling_rate, model_sample_rate);
|
|
|
|
|
+ float lowpass_cutoff = 0.99 * 0.5 * min_freq;
|
|
|
|
|
+
|
|
|
|
|
+ int32_t lowpass_filter_width = 6;
|
|
|
|
|
+ //FIXME
|
|
|
|
|
+ //auto resampler = new LinearResample(
|
|
|
|
|
+ // sampling_rate, model_sample_rate, lowpass_cutoff, lowpass_filter_width);
|
|
|
|
|
+ auto resampler = std::make_unique<LinearResample>(
|
|
|
|
|
+ sampling_rate, model_sample_rate, lowpass_cutoff, lowpass_filter_width);
|
|
|
|
|
+ std::vector<float> samples;
|
|
|
|
|
+ resampler->Resample(waveform, n, true, &samples);
|
|
|
|
|
+ //reset speech_data
|
|
|
|
|
+ speech_len = samples.size();
|
|
|
|
|
+ if (speech_data != NULL) {
|
|
|
|
|
+ free(speech_data);
|
|
|
|
|
+ }
|
|
|
|
|
+ speech_data = (float*)malloc(sizeof(float) * speech_len);
|
|
|
|
|
+ memset(speech_data, 0, sizeof(float) * speech_len);
|
|
|
|
|
+ copy(samples.begin(), samples.end(), speech_data);
|
|
|
|
|
+}
|
|
|
|
|
|
|
|
|
|
+bool Audio::loadwav(const char *filename, int32_t* sampling_rate)
|
|
|
|
|
+{
|
|
|
|
|
+ WaveHeader header;
|
|
|
if (speech_data != NULL) {
|
|
if (speech_data != NULL) {
|
|
|
free(speech_data);
|
|
free(speech_data);
|
|
|
}
|
|
}
|
|
|
if (speech_buff != NULL) {
|
|
if (speech_buff != NULL) {
|
|
|
free(speech_buff);
|
|
free(speech_buff);
|
|
|
}
|
|
}
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
offset = 0;
|
|
offset = 0;
|
|
|
-
|
|
|
|
|
- FILE *fp;
|
|
|
|
|
- fp = fopen(filename, "rb");
|
|
|
|
|
- if (fp == nullptr)
|
|
|
|
|
|
|
+ std::ifstream is(filename, std::ifstream::binary);
|
|
|
|
|
+ is.read(reinterpret_cast<char *>(&header), sizeof(header));
|
|
|
|
|
+ if(!is){
|
|
|
|
|
+ fprintf(stderr, "Failed to read %s\n", filename);
|
|
|
return false;
|
|
return false;
|
|
|
- fseek(fp, 0, SEEK_END); /*定位到文件末尾*/
|
|
|
|
|
- uint32_t nFileLen = ftell(fp); /*得到文件大小*/
|
|
|
|
|
- fseek(fp, 44, SEEK_SET); /*跳过wav文件头*/
|
|
|
|
|
-
|
|
|
|
|
- speech_len = (nFileLen - 44) / 2;
|
|
|
|
|
- speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
|
|
|
|
|
- speech_buff = (int16_t *)malloc(sizeof(int16_t) * speech_align_len);
|
|
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ *sampling_rate = header.sample_rate;
|
|
|
|
|
+ // header.subchunk2_size contains the number of bytes in the data.
|
|
|
|
|
+ // As we assume each sample contains two bytes, so it is divided by 2 here
|
|
|
|
|
+ speech_len = header.subchunk2_size / 2;
|
|
|
|
|
+ speech_buff = (int16_t *)malloc(sizeof(int16_t) * speech_len);
|
|
|
|
|
|
|
|
if (speech_buff)
|
|
if (speech_buff)
|
|
|
{
|
|
{
|
|
|
- memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
|
|
|
|
|
- int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp);
|
|
|
|
|
- fclose(fp);
|
|
|
|
|
|
|
+ memset(speech_buff, 0, sizeof(int16_t) * speech_len);
|
|
|
|
|
+ is.read(reinterpret_cast<char *>(speech_buff), header.subchunk2_size);
|
|
|
|
|
+ if (!is) {
|
|
|
|
|
+ fprintf(stderr, "Failed to read %s\n", filename);
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
+ speech_data = (float*)malloc(sizeof(float) * speech_len);
|
|
|
|
|
+ memset(speech_data, 0, sizeof(float) * speech_len);
|
|
|
|
|
|
|
|
- speech_data = (float*)malloc(sizeof(float) * speech_align_len);
|
|
|
|
|
- memset(speech_data, 0, sizeof(float) * speech_align_len);
|
|
|
|
|
- int i;
|
|
|
|
|
float scale = 1;
|
|
float scale = 1;
|
|
|
-
|
|
|
|
|
if (data_type == 1) {
|
|
if (data_type == 1) {
|
|
|
scale = 32768;
|
|
scale = 32768;
|
|
|
}
|
|
}
|
|
|
-
|
|
|
|
|
- for (i = 0; i < speech_len; i++) {
|
|
|
|
|
|
|
+ for (int32_t i = 0; i != speech_len; ++i) {
|
|
|
speech_data[i] = (float)speech_buff[i] / scale;
|
|
speech_data[i] = (float)speech_buff[i] / scale;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ //resample
|
|
|
|
|
+ if(*sampling_rate != model_sample_rate){
|
|
|
|
|
+ wavResample(*sampling_rate, speech_data, speech_len);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
AudioFrame* frame = new AudioFrame(speech_len);
|
|
AudioFrame* frame = new AudioFrame(speech_len);
|
|
|
frame_queue.push(frame);
|
|
frame_queue.push(frame);
|
|
|
|
|
|
|
|
-
|
|
|
|
|
return true;
|
|
return true;
|
|
|
}
|
|
}
|
|
|
else
|
|
else
|
|
|
return false;
|
|
return false;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-
|
|
|
|
|
-bool Audio::loadwav(const char* buf, int nFileLen)
|
|
|
|
|
|
|
+bool Audio::loadwav(const char* buf, int nFileLen, int32_t* sampling_rate)
|
|
|
{
|
|
{
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
|
|
+ WaveHeader header;
|
|
|
if (speech_data != NULL) {
|
|
if (speech_data != NULL) {
|
|
|
free(speech_data);
|
|
free(speech_data);
|
|
|
}
|
|
}
|
|
|
if (speech_buff != NULL) {
|
|
if (speech_buff != NULL) {
|
|
|
free(speech_buff);
|
|
free(speech_buff);
|
|
|
}
|
|
}
|
|
|
-
|
|
|
|
|
offset = 0;
|
|
offset = 0;
|
|
|
|
|
|
|
|
- size_t nOffset = 0;
|
|
|
|
|
|
|
+ std::memcpy(&header, buf, sizeof(header));
|
|
|
|
|
|
|
|
-#define WAV_HEADER_SIZE 44
|
|
|
|
|
-
|
|
|
|
|
- speech_len = (nFileLen - WAV_HEADER_SIZE) / 2;
|
|
|
|
|
- speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
|
|
|
|
|
- speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_align_len);
|
|
|
|
|
|
|
+ *sampling_rate = header.sample_rate;
|
|
|
|
|
+ speech_len = header.subchunk2_size / 2;
|
|
|
|
|
+ speech_buff = (int16_t *)malloc(sizeof(int16_t) * speech_len);
|
|
|
if (speech_buff)
|
|
if (speech_buff)
|
|
|
{
|
|
{
|
|
|
- memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
|
|
|
|
|
|
|
+ memset(speech_buff, 0, sizeof(int16_t) * speech_len);
|
|
|
memcpy((void*)speech_buff, (const void*)(buf + WAV_HEADER_SIZE), speech_len * sizeof(int16_t));
|
|
memcpy((void*)speech_buff, (const void*)(buf + WAV_HEADER_SIZE), speech_len * sizeof(int16_t));
|
|
|
|
|
|
|
|
|
|
+ speech_data = (float*)malloc(sizeof(float) * speech_len);
|
|
|
|
|
+ memset(speech_data, 0, sizeof(float) * speech_len);
|
|
|
|
|
|
|
|
- speech_data = (float*)malloc(sizeof(float) * speech_align_len);
|
|
|
|
|
- memset(speech_data, 0, sizeof(float) * speech_align_len);
|
|
|
|
|
- int i;
|
|
|
|
|
float scale = 1;
|
|
float scale = 1;
|
|
|
-
|
|
|
|
|
if (data_type == 1) {
|
|
if (data_type == 1) {
|
|
|
scale = 32768;
|
|
scale = 32768;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- for (i = 0; i < speech_len; i++) {
|
|
|
|
|
|
|
+ for (int32_t i = 0; i != speech_len; ++i) {
|
|
|
speech_data[i] = (float)speech_buff[i] / scale;
|
|
speech_data[i] = (float)speech_buff[i] / scale;
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+ //resample
|
|
|
|
|
+ if(*sampling_rate != model_sample_rate){
|
|
|
|
|
+ wavResample(*sampling_rate, speech_data, speech_len);
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
|
|
+ AudioFrame* frame = new AudioFrame(speech_len);
|
|
|
|
|
+ frame_queue.push(frame);
|
|
|
|
|
|
|
|
return true;
|
|
return true;
|
|
|
}
|
|
}
|
|
|
else
|
|
else
|
|
|
return false;
|
|
return false;
|
|
|
-
|
|
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-
|
|
|
|
|
-bool Audio::loadpcmwav(const char* buf, int nBufLen)
|
|
|
|
|
|
|
+bool Audio::loadpcmwav(const char* buf, int nBufLen, int32_t* sampling_rate)
|
|
|
{
|
|
{
|
|
|
if (speech_data != NULL) {
|
|
if (speech_data != NULL) {
|
|
|
free(speech_data);
|
|
free(speech_data);
|
|
@@ -234,33 +349,29 @@ bool Audio::loadpcmwav(const char* buf, int nBufLen)
|
|
|
}
|
|
}
|
|
|
offset = 0;
|
|
offset = 0;
|
|
|
|
|
|
|
|
- size_t nOffset = 0;
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
speech_len = nBufLen / 2;
|
|
speech_len = nBufLen / 2;
|
|
|
- speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
|
|
|
|
|
- speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_align_len);
|
|
|
|
|
|
|
+ speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_len);
|
|
|
if (speech_buff)
|
|
if (speech_buff)
|
|
|
{
|
|
{
|
|
|
- memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
|
|
|
|
|
|
|
+ memset(speech_buff, 0, sizeof(int16_t) * speech_len);
|
|
|
memcpy((void*)speech_buff, (const void*)buf, speech_len * sizeof(int16_t));
|
|
memcpy((void*)speech_buff, (const void*)buf, speech_len * sizeof(int16_t));
|
|
|
|
|
|
|
|
|
|
+ speech_data = (float*)malloc(sizeof(float) * speech_len);
|
|
|
|
|
+ memset(speech_data, 0, sizeof(float) * speech_len);
|
|
|
|
|
|
|
|
- speech_data = (float*)malloc(sizeof(float) * speech_align_len);
|
|
|
|
|
- memset(speech_data, 0, sizeof(float) * speech_align_len);
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
- int i;
|
|
|
|
|
float scale = 1;
|
|
float scale = 1;
|
|
|
-
|
|
|
|
|
if (data_type == 1) {
|
|
if (data_type == 1) {
|
|
|
scale = 32768;
|
|
scale = 32768;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- for (i = 0; i < speech_len; i++) {
|
|
|
|
|
|
|
+ for (int32_t i = 0; i != speech_len; ++i) {
|
|
|
speech_data[i] = (float)speech_buff[i] / scale;
|
|
speech_data[i] = (float)speech_buff[i] / scale;
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+ //resample
|
|
|
|
|
+ if(*sampling_rate != model_sample_rate){
|
|
|
|
|
+ wavResample(*sampling_rate, speech_data, speech_len);
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
AudioFrame* frame = new AudioFrame(speech_len);
|
|
AudioFrame* frame = new AudioFrame(speech_len);
|
|
|
frame_queue.push(frame);
|
|
frame_queue.push(frame);
|
|
@@ -269,13 +380,10 @@ bool Audio::loadpcmwav(const char* buf, int nBufLen)
|
|
|
}
|
|
}
|
|
|
else
|
|
else
|
|
|
return false;
|
|
return false;
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-bool Audio::loadpcmwav(const char* filename)
|
|
|
|
|
|
|
+bool Audio::loadpcmwav(const char* filename, int32_t* sampling_rate)
|
|
|
{
|
|
{
|
|
|
-
|
|
|
|
|
if (speech_data != NULL) {
|
|
if (speech_data != NULL) {
|
|
|
free(speech_data);
|
|
free(speech_data);
|
|
|
}
|
|
}
|
|
@@ -293,34 +401,31 @@ bool Audio::loadpcmwav(const char* filename)
|
|
|
fseek(fp, 0, SEEK_SET);
|
|
fseek(fp, 0, SEEK_SET);
|
|
|
|
|
|
|
|
speech_len = (nFileLen) / 2;
|
|
speech_len = (nFileLen) / 2;
|
|
|
- speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
|
|
|
|
|
- speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_align_len);
|
|
|
|
|
|
|
+ speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_len);
|
|
|
if (speech_buff)
|
|
if (speech_buff)
|
|
|
{
|
|
{
|
|
|
- memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
|
|
|
|
|
|
|
+ memset(speech_buff, 0, sizeof(int16_t) * speech_len);
|
|
|
int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp);
|
|
int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp);
|
|
|
fclose(fp);
|
|
fclose(fp);
|
|
|
|
|
|
|
|
- speech_data = (float*)malloc(sizeof(float) * speech_align_len);
|
|
|
|
|
- memset(speech_data, 0, sizeof(float) * speech_align_len);
|
|
|
|
|
-
|
|
|
|
|
|
|
+ speech_data = (float*)malloc(sizeof(float) * speech_len);
|
|
|
|
|
+ memset(speech_data, 0, sizeof(float) * speech_len);
|
|
|
|
|
|
|
|
-
|
|
|
|
|
- int i;
|
|
|
|
|
float scale = 1;
|
|
float scale = 1;
|
|
|
-
|
|
|
|
|
if (data_type == 1) {
|
|
if (data_type == 1) {
|
|
|
scale = 32768;
|
|
scale = 32768;
|
|
|
}
|
|
}
|
|
|
-
|
|
|
|
|
- for (i = 0; i < speech_len; i++) {
|
|
|
|
|
|
|
+ for (int32_t i = 0; i != speech_len; ++i) {
|
|
|
speech_data[i] = (float)speech_buff[i] / scale;
|
|
speech_data[i] = (float)speech_buff[i] / scale;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ //resample
|
|
|
|
|
+ if(*sampling_rate != model_sample_rate){
|
|
|
|
|
+ wavResample(*sampling_rate, speech_data, speech_len);
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
AudioFrame* frame = new AudioFrame(speech_len);
|
|
AudioFrame* frame = new AudioFrame(speech_len);
|
|
|
frame_queue.push(frame);
|
|
frame_queue.push(frame);
|
|
|
-
|
|
|
|
|
|
|
|
|
|
return true;
|
|
return true;
|
|
|
}
|
|
}
|
|
@@ -329,7 +434,6 @@ bool Audio::loadpcmwav(const char* filename)
|
|
|
|
|
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-
|
|
|
|
|
int Audio::fetch_chunck(float *&dout, int len)
|
|
int Audio::fetch_chunck(float *&dout, int len)
|
|
|
{
|
|
{
|
|
|
if (offset >= speech_align_len) {
|
|
if (offset >= speech_align_len) {
|