test_rtf.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. import time
  2. import sys
  3. import librosa
  4. import argparse
  5. parser = argparse.ArgumentParser()
  6. parser.add_argument('--model_dir', type=str, required=True)
  7. parser.add_argument('--backend', type=str, default='onnx', help='["onnx", "torch"]')
  8. parser.add_argument('--wav_file', type=str, default=None, help='amp fallback number')
  9. parser.add_argument('--quantize', type=bool, default=False, help='quantized model')
  10. parser.add_argument('--intra_op_num_threads', type=int, default=1, help='intra_op_num_threads for onnx')
  11. args = parser.parse_args()
  12. from funasr.runtime.python.libtorch.torch_paraformer import Paraformer
  13. if args.backend == "onnx":
  14. from funasr.runtime.python.onnxruntime.rapid_paraformer import Paraformer
  15. model = Paraformer(args.model_dir, batch_size=1, quantize=args.quantize, intra_op_num_threads=args.intra_op_num_threads)
  16. wav_file_f = open(args.wav_file, 'r')
  17. wav_files = wav_file_f.readlines()
  18. # warm-up
  19. total = 0.0
  20. num = 30
  21. wav_path = wav_files[0].split("\t")[1].strip() if "\t" in wav_files[0] else wav_files[0].split(" ")[1].strip()
  22. for i in range(num):
  23. beg_time = time.time()
  24. result = model(wav_path)
  25. end_time = time.time()
  26. duration = end_time-beg_time
  27. total += duration
  28. print(result)
  29. print("num: {}, time, {}, avg: {}, rtf: {}".format(len(wav_path), duration, total/(i+1), (total/(i+1))/5.53))
  30. # infer time
  31. beg_time = time.time()
  32. for i, wav_path_i in enumerate(wav_files):
  33. wav_path = wav_path_i.split("\t")[1].strip() if "\t" in wav_path_i else wav_path_i.split(" ")[1].strip()
  34. result = model(wav_path)
  35. end_time = time.time()
  36. duration = (end_time-beg_time)*1000
  37. print("total_time_comput_ms: {}".format(int(duration)))
  38. duration_time = 0.0
  39. for i, wav_path_i in enumerate(wav_files):
  40. wav_path = wav_path_i.split("\t")[1].strip() if "\t" in wav_path_i else wav_path_i.split(" ")[1].strip()
  41. waveform, _ = librosa.load(wav_path, sr=16000)
  42. duration_time += len(waveform)/16.0
  43. print("total_time_wav_ms: {}".format(int(duration_time)))
  44. print("total_rtf: {:.5}".format(duration/duration_time))