build_dataloader.py 1.4 KB

12345678910111213141516171819202122232425262728
  1. from funasr.datasets.large_datasets.build_dataloader import LargeDataLoader
  2. from funasr.datasets.small_datasets.sequence_iter_factory import SequenceIterFactory
  3. def build_dataloader(args):
  4. if args.dataset_type == "small":
  5. if args.task_name == "diar" and args.model == "eend_ola":
  6. from funasr.modules.eend_ola.eend_ola_dataloader import EENDOLADataLoader
  7. train_iter_factory = EENDOLADataLoader(
  8. data_file=args.train_data_path_and_name_and_type[0][0],
  9. batch_size=args.dataset_conf["batch_conf"]["batch_size"],
  10. num_workers=args.dataset_conf["num_workers"],
  11. shuffle=True)
  12. valid_iter_factory = EENDOLADataLoader(
  13. data_file=args.valid_data_path_and_name_and_type[0][0],
  14. batch_size=args.dataset_conf["batch_conf"]["batch_size"],
  15. num_workers=0,
  16. shuffle=False)
  17. else:
  18. train_iter_factory = SequenceIterFactory(args, mode="train")
  19. valid_iter_factory = SequenceIterFactory(args, mode="valid")
  20. elif args.dataset_type == "large":
  21. train_iter_factory = LargeDataLoader(args, mode="train")
  22. valid_iter_factory = LargeDataLoader(args, mode="valid")
  23. else:
  24. raise ValueError(f"Not supported dataset_type={args.dataset_type}")
  25. return train_iter_factory, valid_iter_factory