test_rtf_gpu.py 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. import time
  2. import sys
  3. import librosa
  4. from funasr.utils.types import str2bool
  5. import argparse
  6. parser = argparse.ArgumentParser()
  7. parser.add_argument('--model_dir', type=str, required=True)
  8. parser.add_argument('--backend', type=str, default='onnx', help='["onnx", "torch"]')
  9. parser.add_argument('--wav_file', type=str, default=None, help='amp fallback number')
  10. parser.add_argument('--quantize', type=str2bool, default=False, help='quantized model')
  11. parser.add_argument('--intra_op_num_threads', type=int, default=1, help='intra_op_num_threads for onnx')
  12. parser.add_argument('--batch_size', type=int, default=1, help='batch_size for onnx')
  13. args = parser.parse_args()
  14. from funasr.runtime.python.libtorch.funasr_torch import Paraformer
  15. if args.backend == "onnx":
  16. from funasr.runtime.python.onnxruntime.funasr_onnx import Paraformer
  17. if 'blade' in args.model_dir:
  18. import torch_blade
  19. model = Paraformer(args.model_dir, batch_size=args.batch_size, quantize=args.quantize, intra_op_num_threads=args.intra_op_num_threads)
  20. wav_file_f = open(args.wav_file, 'r')
  21. wav_files = wav_file_f.readlines()
  22. # warm-up
  23. total = 0.0
  24. num = 100
  25. wav_path = []
  26. for i in range(num * args.batch_size):
  27. wav_path_i = wav_files[i % len(wav_files)]
  28. wav_path_i = wav_path_i.split("\t")[1].strip() if "\t" in wav_path_i else wav_path_i.split(" ")[1].strip()
  29. wav_path += [wav_path_i]
  30. beg_time = time.time()
  31. result = model(wav_path)
  32. end_time = time.time()
  33. duration = end_time-beg_time
  34. print(result)
  35. print("num: {}, time, {}, avg: {}".format(len(wav_path), duration, total/(i+1)))
  36. # infer time
  37. wav_path = []
  38. beg_time = time.time()
  39. for i, wav_path_i in enumerate(wav_files):
  40. wav_path_i = wav_path_i.split("\t")[1].strip() if "\t" in wav_path_i else wav_path_i.split(" ")[1].strip()
  41. wav_path += [wav_path_i]
  42. result = model(wav_path)
  43. end_time = time.time()
  44. duration = (end_time-beg_time)*1000
  45. print("total_time_comput_ms: {}".format(int(duration)))
  46. duration_time = 0.0
  47. for i, wav_path_i in enumerate(wav_files):
  48. wav_path = wav_path_i.split("\t")[1].strip() if "\t" in wav_path_i else wav_path_i.split(" ")[1].strip()
  49. waveform, _ = librosa.load(wav_path, sr=16000)
  50. duration_time += len(waveform)/16.0
  51. print("total_time_wav_ms: {}".format(int(duration_time)))
  52. print("total_rtf: {:.5}".format(duration/duration_time))