load_pretrained_model.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. from typing import Any
  2. from typing import Dict
  3. from typing import Union
  4. from io import BytesIO
  5. import logging
  6. import torch
  7. import torch.nn
  8. import torch.optim
  9. def filter_state_dict(
  10. dst_state: Dict[str, Union[float, torch.Tensor]],
  11. src_state: Dict[str, Union[float, torch.Tensor]],
  12. ):
  13. """Filter name, size mismatch instances between dicts.
  14. Args:
  15. dst_state: reference state dict for filtering
  16. src_state: target state dict for filtering
  17. """
  18. match_state = {}
  19. for key, value in src_state.items():
  20. if key in dst_state and (dst_state[key].size() == src_state[key].size()):
  21. match_state[key] = value
  22. else:
  23. if key not in dst_state:
  24. logging.warning(
  25. f"Filter out {key} from pretrained dict"
  26. + " because of name not found in target dict"
  27. )
  28. else:
  29. logging.warning(
  30. f"Filter out {key} from pretrained dict"
  31. + " because of size mismatch"
  32. + f"({dst_state[key].size()}-{src_state[key].size()})"
  33. )
  34. return match_state
  35. def assigment_scope_map(dst_state: dict, src_state: dict, scope_map: str=None):
  36. """Compute the union of the current variables and checkpoint variables."""
  37. import collections
  38. import re
  39. # current model variables
  40. name_to_variable = collections.OrderedDict()
  41. for name, var in dst_state.items():
  42. name_to_variable[name] = var
  43. scope_map_num = 0
  44. if scope_map is not None:
  45. scope_map = scope_map.split(",")
  46. scope_map_num = len(scope_map) // 2
  47. for scope_map_idx in range(scope_map_num):
  48. scope_map_id = scope_map_idx * 2
  49. logging.info('assignment_map from scope {} to {}'.format(scope_map[scope_map_id], scope_map[scope_map_id+1]))
  50. assignment_map = {}
  51. for name, var in src_state.items():
  52. if scope_map:
  53. for scope_map_idx in range(scope_map_num):
  54. scope_map_id = scope_map_idx * 2
  55. try:
  56. idx = name.index(scope_map[scope_map_id])
  57. new_name = scope_map[scope_map_id+1] + name[idx + len(scope_map[scope_map_id]):]
  58. if new_name in name_to_variable:
  59. assignment_map[name] = var
  60. except:
  61. continue
  62. else:
  63. if name in name_to_variable:
  64. assignment_map[name] = var
  65. return assignment_map
  66. def load_pretrained_model(
  67. path: str,
  68. model: torch.nn.Module,
  69. ignore_init_mismatch: bool,
  70. map_location: str = "cpu",
  71. oss_bucket=None,
  72. scope_map=None,
  73. excludes=None,
  74. ):
  75. """Load a model state and set it to the model.
  76. Args:
  77. init_param: <file_path>:<src_key>:<dst_key>:<exclude_Keys>
  78. Examples:
  79. """
  80. obj = model
  81. dst_state = obj.state_dict()
  82. # import pdb;
  83. # pdb.set_trace()
  84. print(f"ckpt: {path}")
  85. if oss_bucket is None:
  86. src_state = torch.load(path, map_location=map_location)
  87. else:
  88. buffer = BytesIO(oss_bucket.get_object(path).read())
  89. src_state = torch.load(buffer, map_location=map_location)
  90. if "state_dict" in src_state:
  91. src_state = src_state["state_dict"]
  92. for k in dst_state.keys():
  93. if not k.startswith("module.") and "module." + k in src_state.keys():
  94. k_ddp = "module." + k
  95. else:
  96. k_ddp = k
  97. if k_ddp in src_state:
  98. dst_state[k] = src_state[k_ddp]
  99. else:
  100. print(f"Miss key in ckpt: model: {k}, ckpt: {k_ddp}")
  101. flag = obj.load_state_dict(dst_state, strict=True)
  102. print(flag)
  103. # def load_pretrained_model(
  104. # path: str,
  105. # model: torch.nn.Module,
  106. # ignore_init_mismatch: bool,
  107. # map_location: str = "cpu",
  108. # oss_bucket=None,
  109. # scope_map=None,
  110. # excludes=None,
  111. # ):
  112. # """Load a model state and set it to the model.
  113. #
  114. # Args:
  115. # init_param: <file_path>:<src_key>:<dst_key>:<exclude_Keys>
  116. #
  117. # Examples:
  118. #
  119. # """
  120. #
  121. # obj = model
  122. #
  123. # if oss_bucket is None:
  124. # src_state = torch.load(path, map_location=map_location)
  125. # else:
  126. # buffer = BytesIO(oss_bucket.get_object(path).read())
  127. # src_state = torch.load(buffer, map_location=map_location)
  128. # src_state = src_state["model"] if "model" in src_state else src_state
  129. #
  130. # if excludes is not None:
  131. # for e in excludes.split(","):
  132. # src_state = {k: v for k, v in src_state.items() if not k.startswith(e)}
  133. #
  134. # dst_state = obj.state_dict()
  135. # src_state = assigment_scope_map(dst_state, src_state, scope_map)
  136. #
  137. # if ignore_init_mismatch:
  138. # src_state = filter_state_dict(dst_state, src_state)
  139. #
  140. # logging.debug("Loaded src_state keys: {}".format(src_state.keys()))
  141. # logging.debug("Loaded dst_state keys: {}".format(dst_state.keys()))
  142. # dst_state.update(src_state)
  143. # obj.load_state_dict(dst_state, strict=True)