load_pretrained_model.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. from typing import Any
  2. from typing import Dict
  3. from typing import Union
  4. from io import BytesIO
  5. import logging
  6. import torch
  7. import torch.nn
  8. import torch.optim
  9. import pdb
  10. def filter_state_dict(
  11. dst_state: Dict[str, Union[float, torch.Tensor]],
  12. src_state: Dict[str, Union[float, torch.Tensor]],
  13. ):
  14. """Filter name, size mismatch instances between dicts.
  15. Args:
  16. dst_state: reference state dict for filtering
  17. src_state: target state dict for filtering
  18. """
  19. match_state = {}
  20. for key, value in src_state.items():
  21. if key in dst_state and (dst_state[key].size() == src_state[key].size()):
  22. match_state[key] = value
  23. else:
  24. if key not in dst_state:
  25. logging.warning(
  26. f"Filter out {key} from pretrained dict"
  27. + " because of name not found in target dict"
  28. )
  29. else:
  30. logging.warning(
  31. f"Filter out {key} from pretrained dict"
  32. + " because of size mismatch"
  33. + f"({dst_state[key].size()}-{src_state[key].size()})"
  34. )
  35. return match_state
  36. def load_pretrained_model(
  37. path: str,
  38. model: torch.nn.Module,
  39. ignore_init_mismatch: bool=True,
  40. map_location: str = "cpu",
  41. oss_bucket=None,
  42. scope_map=[],
  43. excludes=None,
  44. ignore_mismatch=False,
  45. **kwargs,
  46. ):
  47. """Load a model state and set it to the model.
  48. Args:
  49. init_param: <file_path>:<src_key>:<dst_key>:<exclude_Keys>
  50. Examples:
  51. """
  52. obj = model
  53. dst_state = obj.state_dict()
  54. print(f"ckpt: {path}")
  55. if oss_bucket is None:
  56. src_state = torch.load(path, map_location=map_location)
  57. else:
  58. buffer = BytesIO(oss_bucket.get_object(path).read())
  59. src_state = torch.load(buffer, map_location=map_location)
  60. src_state = src_state["state_dict"] if "state_dict" in src_state else src_state
  61. src_state = src_state["model_state_dict"] if "model_state_dict" in src_state else src_state
  62. src_state = src_state["model"] if "model" in src_state else src_state
  63. if isinstance(scope_map, str):
  64. scope_map = scope_map.split(",")
  65. scope_map += ["module.", "None"]
  66. for k in dst_state.keys():
  67. k_src = k
  68. if scope_map is not None:
  69. src_prefix = ""
  70. dst_prefix = ""
  71. for i in range(0, len(scope_map), 2):
  72. src_prefix = scope_map[i] if scope_map[i].lower() != "none" else ""
  73. dst_prefix = scope_map[i+1] if scope_map[i+1].lower() != "none" else ""
  74. if dst_prefix == "" and (src_prefix + k) in src_state.keys():
  75. k_src = src_prefix + k
  76. if not k_src.startswith("module."):
  77. print(f"init param, map: {k} from {k_src} in ckpt")
  78. elif k.startswith(dst_prefix) and k.replace(dst_prefix, src_prefix, 1) in src_state.keys():
  79. k_src = k.replace(dst_prefix, src_prefix, 1)
  80. if not k_src.startswith("module."):
  81. print(f"init param, map: {k} from {k_src} in ckpt")
  82. if k_src in src_state.keys():
  83. if ignore_init_mismatch and dst_state[k].shape != src_state[k_src].shape:
  84. print(f"ignore_mismatch:{ignore_mismatch}, dst: {k, dst_state[k].shape}, src: {k_src, src_state[k_src].shape}")
  85. else:
  86. dst_state[k] = src_state[k_src]
  87. else:
  88. print(f"Warning, miss key in ckpt: {k}, mapped: {k_src}")
  89. flag = obj.load_state_dict(dst_state, strict=True)
  90. # print(flag)