resample.cc 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. /**
  2. * Copyright 2013 Pegah Ghahremani
  3. * 2014 IMSL, PKU-HKUST (author: Wei Shi)
  4. * 2014 Yanqing Sun, Junjie Wang
  5. * 2014 Johns Hopkins University (author: Daniel Povey)
  6. * Copyright 2023 Xiaomi Corporation (authors: Fangjun Kuang)
  7. *
  8. * See LICENSE for clarification regarding multiple authors
  9. *
  10. * Licensed under the Apache License, Version 2.0 (the "License");
  11. * you may not use this file except in compliance with the License.
  12. * You may obtain a copy of the License at
  13. *
  14. * http://www.apache.org/licenses/LICENSE-2.0
  15. *
  16. * Unless required by applicable law or agreed to in writing, software
  17. * distributed under the License is distributed on an "AS IS" BASIS,
  18. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  19. * See the License for the specific language governing permissions and
  20. * limitations under the License.
  21. */
  22. // this file is copied and modified from
  23. // kaldi/src/feat/resample.cc
  24. #include "resample.h"
  25. #include <assert.h>
  26. #include <math.h>
  27. #include <stdio.h>
  28. #include <cstdlib>
  29. #include <type_traits>
  30. #ifndef M_2PI
  31. #define M_2PI 6.283185307179586476925286766559005
  32. #endif
  33. #ifndef M_PI
  34. #define M_PI 3.1415926535897932384626433832795
  35. #endif
  36. template <class I>
  37. I Gcd(I m, I n) {
  38. // this function is copied from kaldi/src/base/kaldi-math.h
  39. if (m == 0 || n == 0) {
  40. if (m == 0 && n == 0) { // gcd not defined, as all integers are divisors.
  41. fprintf(stderr, "Undefined GCD since m = 0, n = 0.\n");
  42. exit(-1);
  43. }
  44. return (m == 0 ? (n > 0 ? n : -n) : (m > 0 ? m : -m));
  45. // return absolute value of whichever is nonzero
  46. }
  47. // could use compile-time assertion
  48. // but involves messing with complex template stuff.
  49. static_assert(std::is_integral<I>::value, "");
  50. while (1) {
  51. m %= n;
  52. if (m == 0) return (n > 0 ? n : -n);
  53. n %= m;
  54. if (n == 0) return (m > 0 ? m : -m);
  55. }
  56. }
  57. /// Returns the least common multiple of two integers. Will
  58. /// crash unless the inputs are positive.
  59. template <class I>
  60. I Lcm(I m, I n) {
  61. // This function is copied from kaldi/src/base/kaldi-math.h
  62. assert(m > 0 && n > 0);
  63. I gcd = Gcd(m, n);
  64. return gcd * (m / gcd) * (n / gcd);
  65. }
  66. static float DotProduct(const float *a, const float *b, int32_t n) {
  67. float sum = 0;
  68. for (int32_t i = 0; i != n; ++i) {
  69. sum += a[i] * b[i];
  70. }
  71. return sum;
  72. }
  73. LinearResample::LinearResample(int32_t samp_rate_in_hz,
  74. int32_t samp_rate_out_hz, float filter_cutoff_hz,
  75. int32_t num_zeros)
  76. : samp_rate_in_(samp_rate_in_hz),
  77. samp_rate_out_(samp_rate_out_hz),
  78. filter_cutoff_(filter_cutoff_hz),
  79. num_zeros_(num_zeros) {
  80. assert(samp_rate_in_hz > 0.0 && samp_rate_out_hz > 0.0 &&
  81. filter_cutoff_hz > 0.0 && filter_cutoff_hz * 2 <= samp_rate_in_hz &&
  82. filter_cutoff_hz * 2 <= samp_rate_out_hz && num_zeros > 0);
  83. // base_freq is the frequency of the repeating unit, which is the gcd
  84. // of the input frequencies.
  85. int32_t base_freq = Gcd(samp_rate_in_, samp_rate_out_);
  86. input_samples_in_unit_ = samp_rate_in_ / base_freq;
  87. output_samples_in_unit_ = samp_rate_out_ / base_freq;
  88. SetIndexesAndWeights();
  89. Reset();
  90. }
  91. void LinearResample::SetIndexesAndWeights() {
  92. first_index_.resize(output_samples_in_unit_);
  93. weights_.resize(output_samples_in_unit_);
  94. double window_width = num_zeros_ / (2.0 * filter_cutoff_);
  95. for (int32_t i = 0; i < output_samples_in_unit_; i++) {
  96. double output_t = i / static_cast<double>(samp_rate_out_);
  97. double min_t = output_t - window_width, max_t = output_t + window_width;
  98. // we do ceil on the min and floor on the max, because if we did it
  99. // the other way around we would unnecessarily include indexes just
  100. // outside the window, with zero coefficients. It's possible
  101. // if the arguments to the ceil and floor expressions are integers
  102. // (e.g. if filter_cutoff_ has an exact ratio with the sample rates),
  103. // that we unnecessarily include something with a zero coefficient,
  104. // but this is only a slight efficiency issue.
  105. int32_t min_input_index = ceil(min_t * samp_rate_in_),
  106. max_input_index = floor(max_t * samp_rate_in_),
  107. num_indices = max_input_index - min_input_index + 1;
  108. first_index_[i] = min_input_index;
  109. weights_[i].resize(num_indices);
  110. for (int32_t j = 0; j < num_indices; j++) {
  111. int32_t input_index = min_input_index + j;
  112. double input_t = input_index / static_cast<double>(samp_rate_in_),
  113. delta_t = input_t - output_t;
  114. // sign of delta_t doesn't matter.
  115. weights_[i][j] = FilterFunc(delta_t) / samp_rate_in_;
  116. }
  117. }
  118. }
  119. /** Here, t is a time in seconds representing an offset from
  120. the center of the windowed filter function, and FilterFunction(t)
  121. returns the windowed filter function, described
  122. in the header as h(t) = f(t)g(t), evaluated at t.
  123. */
  124. float LinearResample::FilterFunc(float t) const {
  125. float window, // raised-cosine (Hanning) window of width
  126. // num_zeros_/2*filter_cutoff_
  127. filter; // sinc filter function
  128. if (fabs(t) < num_zeros_ / (2.0 * filter_cutoff_))
  129. window = 0.5 * (1 + cos(M_2PI * filter_cutoff_ / num_zeros_ * t));
  130. else
  131. window = 0.0; // outside support of window function
  132. if (t != 0)
  133. filter = sin(M_2PI * filter_cutoff_ * t) / (M_PI * t);
  134. else
  135. filter = 2 * filter_cutoff_; // limit of the function at t = 0
  136. return filter * window;
  137. }
  138. void LinearResample::Reset() {
  139. input_sample_offset_ = 0;
  140. output_sample_offset_ = 0;
  141. input_remainder_.resize(0);
  142. }
  143. void LinearResample::Resample(const float *input, int32_t input_dim, bool flush,
  144. std::vector<float> *output) {
  145. int64_t tot_input_samp = input_sample_offset_ + input_dim,
  146. tot_output_samp = GetNumOutputSamples(tot_input_samp, flush);
  147. assert(tot_output_samp >= output_sample_offset_);
  148. output->resize(tot_output_samp - output_sample_offset_);
  149. // samp_out is the index into the total output signal, not just the part
  150. // of it we are producing here.
  151. for (int64_t samp_out = output_sample_offset_; samp_out < tot_output_samp;
  152. samp_out++) {
  153. int64_t first_samp_in;
  154. int32_t samp_out_wrapped;
  155. GetIndexes(samp_out, &first_samp_in, &samp_out_wrapped);
  156. const std::vector<float> &weights = weights_[samp_out_wrapped];
  157. // first_input_index is the first index into "input" that we have a weight
  158. // for.
  159. int32_t first_input_index =
  160. static_cast<int32_t>(first_samp_in - input_sample_offset_);
  161. float this_output;
  162. if (first_input_index >= 0 &&
  163. first_input_index + static_cast<int32_t>(weights.size()) <= input_dim) {
  164. this_output =
  165. DotProduct(input + first_input_index, weights.data(), weights.size());
  166. } else { // Handle edge cases.
  167. this_output = 0.0;
  168. for (int32_t i = 0; i < static_cast<int32_t>(weights.size()); i++) {
  169. float weight = weights[i];
  170. int32_t input_index = first_input_index + i;
  171. if (input_index < 0 &&
  172. static_cast<int32_t>(input_remainder_.size()) + input_index >= 0) {
  173. this_output +=
  174. weight * input_remainder_[input_remainder_.size() + input_index];
  175. } else if (input_index >= 0 && input_index < input_dim) {
  176. this_output += weight * input[input_index];
  177. } else if (input_index >= input_dim) {
  178. // We're past the end of the input and are adding zero; should only
  179. // happen if the user specified flush == true, or else we would not
  180. // be trying to output this sample.
  181. assert(flush);
  182. }
  183. }
  184. }
  185. int32_t output_index =
  186. static_cast<int32_t>(samp_out - output_sample_offset_);
  187. (*output)[output_index] = this_output;
  188. }
  189. if (flush) {
  190. Reset(); // Reset the internal state.
  191. } else {
  192. SetRemainder(input, input_dim);
  193. input_sample_offset_ = tot_input_samp;
  194. output_sample_offset_ = tot_output_samp;
  195. }
  196. }
  197. int64_t LinearResample::GetNumOutputSamples(int64_t input_num_samp,
  198. bool flush) const {
  199. // For exact computation, we measure time in "ticks" of 1.0 / tick_freq,
  200. // where tick_freq is the least common multiple of samp_rate_in_ and
  201. // samp_rate_out_.
  202. int32_t tick_freq = Lcm(samp_rate_in_, samp_rate_out_);
  203. int32_t ticks_per_input_period = tick_freq / samp_rate_in_;
  204. // work out the number of ticks in the time interval
  205. // [ 0, input_num_samp/samp_rate_in_ ).
  206. int64_t interval_length_in_ticks = input_num_samp * ticks_per_input_period;
  207. if (!flush) {
  208. float window_width = num_zeros_ / (2.0 * filter_cutoff_);
  209. // To count the window-width in ticks we take the floor. This
  210. // is because since we're looking for the largest integer num-out-samp
  211. // that fits in the interval, which is open on the right, a reduction
  212. // in interval length of less than a tick will never make a difference.
  213. // For example, the largest integer in the interval [ 0, 2 ) and the
  214. // largest integer in the interval [ 0, 2 - 0.9 ) are the same (both one).
  215. // So when we're subtracting the window-width we can ignore the fractional
  216. // part.
  217. int32_t window_width_ticks = floor(window_width * tick_freq);
  218. // The time-period of the output that we can sample gets reduced
  219. // by the window-width (which is actually the distance from the
  220. // center to the edge of the windowing function) if we're not
  221. // "flushing the output".
  222. interval_length_in_ticks -= window_width_ticks;
  223. }
  224. if (interval_length_in_ticks <= 0) return 0;
  225. int32_t ticks_per_output_period = tick_freq / samp_rate_out_;
  226. // Get the last output-sample in the closed interval, i.e. replacing [ ) with
  227. // [ ]. Note: integer division rounds down. See
  228. // http://en.wikipedia.org/wiki/Interval_(mathematics) for an explanation of
  229. // the notation.
  230. int64_t last_output_samp = interval_length_in_ticks / ticks_per_output_period;
  231. // We need the last output-sample in the open interval, so if it takes us to
  232. // the end of the interval exactly, subtract one.
  233. if (last_output_samp * ticks_per_output_period == interval_length_in_ticks)
  234. last_output_samp--;
  235. // First output-sample index is zero, so the number of output samples
  236. // is the last output-sample plus one.
  237. int64_t num_output_samp = last_output_samp + 1;
  238. return num_output_samp;
  239. }
  240. // inline
  241. void LinearResample::GetIndexes(int64_t samp_out, int64_t *first_samp_in,
  242. int32_t *samp_out_wrapped) const {
  243. // A unit is the smallest nonzero amount of time that is an exact
  244. // multiple of the input and output sample periods. The unit index
  245. // is the answer to "which numbered unit we are in".
  246. int64_t unit_index = samp_out / output_samples_in_unit_;
  247. // samp_out_wrapped is equal to samp_out % output_samples_in_unit_
  248. *samp_out_wrapped =
  249. static_cast<int32_t>(samp_out - unit_index * output_samples_in_unit_);
  250. *first_samp_in =
  251. first_index_[*samp_out_wrapped] + unit_index * input_samples_in_unit_;
  252. }
  253. void LinearResample::SetRemainder(const float *input, int32_t input_dim) {
  254. std::vector<float> old_remainder(input_remainder_);
  255. // max_remainder_needed is the width of the filter from side to side,
  256. // measured in input samples. you might think it should be half that,
  257. // but you have to consider that you might be wanting to output samples
  258. // that are "in the past" relative to the beginning of the latest
  259. // input... anyway, storing more remainder than needed is not harmful.
  260. int32_t max_remainder_needed =
  261. ceil(samp_rate_in_ * num_zeros_ / filter_cutoff_);
  262. input_remainder_.resize(max_remainder_needed);
  263. for (int32_t index = -static_cast<int32_t>(input_remainder_.size());
  264. index < 0; index++) {
  265. // we interpret "index" as an offset from the end of "input" and
  266. // from the end of input_remainder_.
  267. int32_t input_index = index + input_dim;
  268. if (input_index >= 0) {
  269. input_remainder_[index + static_cast<int32_t>(input_remainder_.size())] =
  270. input[input_index];
  271. } else if (input_index + static_cast<int32_t>(old_remainder.size()) >= 0) {
  272. input_remainder_[index + static_cast<int32_t>(input_remainder_.size())] =
  273. old_remainder[input_index +
  274. static_cast<int32_t>(old_remainder.size())];
  275. // else leave it at zero.
  276. }
  277. }
  278. }