|
|
@@ -658,6 +658,10 @@ def inference_paraformer_vad_punc(
|
|
|
sorted_data = sorted(data_with_index, key=lambda x: x[0][1] - x[0][0])
|
|
|
results_sorted = []
|
|
|
batch_size_token_ms = batch_size_token*60
|
|
|
+ if speech2text.device == "cpu":
|
|
|
+ batch_size_token_ms = 0
|
|
|
+ batch_size_token_ms = max(batch_size_token_ms, sorted_data[0][0][1] - sorted_data[0][0][0])
|
|
|
+
|
|
|
batch_size_token_ms_cum = 0
|
|
|
beg_idx = 0
|
|
|
for j, _ in enumerate(range(0, n)):
|