biocoder_env_box.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. import json
  2. import os
  3. import re
  4. import sys
  5. from collections import defaultdict
  6. from dataclasses import dataclass
  7. from datasets import load_dataset
  8. from opendevin.core.config import load_app_config
  9. from opendevin.core.logger import opendevin_logger as logger
  10. from opendevin.runtime.docker.ssh_box import DockerSSHBox
  11. from opendevin.runtime.plugins import (
  12. JupyterRequirement,
  13. PluginRequirement,
  14. SWEAgentCommandsRequirement,
  15. )
  16. config = load_app_config()
  17. BIOCODER_BENCH_CONTAINER_IMAGE = 'public.ecr.aws/i5g0m1f6/eval_biocoder:v1.0'
  18. @dataclass
  19. class BiocoderData:
  20. filePath: str
  21. numLines: int
  22. lineStart: int
  23. lineEnd: int
  24. signature: str
  25. comment: str
  26. content: str
  27. repository: str
  28. promptSummaryOnly: str
  29. contextCode: str
  30. goldenCode: str
  31. test_case_id: str
  32. language: str
  33. def to_dict(self):
  34. return {
  35. 'filePath': self.filePath,
  36. 'numLines': self.numLines,
  37. 'lineStart': self.lineStart,
  38. 'lineEnd': self.lineEnd,
  39. 'signature': self.signature,
  40. 'comment': self.comment,
  41. 'content': self.content,
  42. 'repository': self.repository,
  43. 'promptSummaryOnly': self.promptSummaryOnly,
  44. 'contextCode': self.contextCode,
  45. 'goldenCode': self.goldenCode,
  46. 'test_case_id': self.test_case_id,
  47. 'language': self.language,
  48. }
  49. def get_likely_indent_size(array_of_tabs) -> int:
  50. sizes = defaultdict(int)
  51. for i in range(len(array_of_tabs) - 1):
  52. diff = array_of_tabs[i + 1] - array_of_tabs[i]
  53. if diff > 0:
  54. sizes[diff] += 1
  55. if len(sizes) == 0:
  56. return 4
  57. return int(max(sizes, key=sizes.get))
  58. class BiocoderSSHBox(DockerSSHBox):
  59. def __init__(
  60. self,
  61. container_image: str,
  62. timeout: int = 120,
  63. sid: str | None = None,
  64. biocoder_instance_id: str | None = None,
  65. biocoder_instance: BiocoderData | None = None,
  66. skip_workspace_mount: bool = True,
  67. sandbox_plugins: list[PluginRequirement] = [], # noqa: B006
  68. biocoder_cache_folder: str = 'biocoder_cache',
  69. workspace_dir_name: str | None = None,
  70. ):
  71. if biocoder_instance_id is None:
  72. raise ValueError('biocoder_instance_id must be provided')
  73. self.biocoder_instance_id = biocoder_instance_id
  74. self.biocoder_instance = biocoder_instance
  75. self.skip_workspace_mount = skip_workspace_mount
  76. self.biocoder_cache_folder = biocoder_cache_folder
  77. self.first_line_after_removed = None
  78. self.workspace_dir_name = workspace_dir_name
  79. self.workspace_base = config.workspace_base
  80. self.workspace_mount_path = config.workspace_mount_path
  81. # self.workspace_dir_name_host = os.path.join(config.workspace_base, workspace_dir_name)
  82. self.context_path = None
  83. self.generated_path = None
  84. self.golden_path = None
  85. assert (
  86. container_image is not None
  87. ), 'container_image is required for BiocoderBenchSSHBox!'
  88. super().__init__(container_image, timeout, sid)
  89. self.init_plugins(sandbox_plugins)
  90. @property
  91. def volumes(self):
  92. if self.skip_workspace_mount:
  93. return {
  94. k: v
  95. for k, v in super().volumes.items()
  96. if not v['bind'] == self.sandbox_workspace_dir
  97. }
  98. return super().volumes
  99. def get_target_filepath(self):
  100. target_filepath = os.path.join(
  101. self.workspace_mount_path,
  102. self.biocoder_instance.repository.split('/')[1],
  103. self.biocoder_instance.filePath,
  104. )
  105. return target_filepath
  106. def get_changed_code(self, include_signature=False):
  107. # copies changed code into /testing_files/
  108. # Note that this does NOT copy the function signature
  109. target_filepath = self.get_target_filepath()
  110. selected_lines = []
  111. offset = 1 if include_signature else 0
  112. if self.first_line_after_removed is None:
  113. logger.warning('First line after removed is None')
  114. with open(target_filepath, 'r') as f:
  115. lines = f.read().split('\n')
  116. for i in range(self.biocoder_instance.lineStart - offset, len(lines)):
  117. if lines[i].strip() == self.first_line_after_removed.strip():
  118. break
  119. selected_lines.append(lines[i])
  120. text = '\n'.join(selected_lines)
  121. return text
  122. def copy_changed_code(self):
  123. changed_code = self.get_changed_code(include_signature=True)
  124. with open(self.generated_path, 'w') as f:
  125. f.write(changed_code)
  126. exit_code, output = self.execute_and_check(
  127. f'cp -r /workspace/{self.biocoder_cache_folder}/* /testing_files',
  128. 'Failed to copy the files',
  129. )
  130. def remove_code(self):
  131. comment_prefix = {'python': '#', 'java': '//'}
  132. target_filepath = self.get_target_filepath()
  133. line_start = self.biocoder_instance.lineStart
  134. line_end = self.biocoder_instance.lineEnd
  135. with open(target_filepath, 'r') as f:
  136. lines = f.read().split('\n')
  137. # print("="*10+"ORIGINAL"+"="*10)
  138. # print("\n".join(lines))
  139. signature_line = lines[line_start - 1]
  140. # get the number of tabs
  141. def get_indent_size(s: str):
  142. return len(re.match(r'\s*', s).group())
  143. indent_sizes = list(map(get_indent_size, lines))
  144. indent_size = get_likely_indent_size(indent_sizes)
  145. comment_indent_size = get_indent_size(signature_line) + indent_size
  146. lines = (
  147. lines[:line_start]
  148. + [
  149. f"{' '*comment_indent_size+comment_prefix[self.biocoder_instance.language.lower()]}TODO: replace with your code here"
  150. ]
  151. + ([''] * 2)
  152. + lines[line_end:]
  153. )
  154. first_line_after_removed_index = line_start
  155. while len(
  156. lines[first_line_after_removed_index].strip()
  157. ) == 0 and first_line_after_removed_index < len(lines):
  158. first_line_after_removed_index += 1
  159. self.first_line_after_removed = lines[first_line_after_removed_index]
  160. # print("FIRST LINE AFTER REMOVED: ", self.first_line_after_removed)
  161. with open(target_filepath, 'w') as f:
  162. f.write('\n'.join(lines))
  163. # with open(target_filepath, 'r') as f:
  164. # print("="*10+"MODIFIED"+"="*10)
  165. # print(f.read())
  166. def execute_and_check(self, cmd: str, error_msg: str) -> tuple[int, str]:
  167. exit_code, output = self.execute(cmd)
  168. if exit_code != 0:
  169. logger.error(error_msg)
  170. sys.exit(1)
  171. return exit_code, output
  172. @classmethod
  173. def get_box_for_instance(
  174. cls,
  175. instance,
  176. workspace_dir_name=None,
  177. skip_workspace_mount: bool = False,
  178. workspace_mount_path: str | None = None,
  179. sandbox_plugins: list[PluginRequirement] = [], # noqa: B006
  180. ) -> 'BiocoderSSHBox':
  181. """This method initializes a container image, then runs some initialization commands"""
  182. if workspace_dir_name is None:
  183. workspace_dir_name = f'{instance.repository}__{instance.test_case_id[:10]}__{os.getpid()}'.replace(
  184. '/', '__'
  185. )
  186. workspace_base = str(os.path.join(config.workspace_base, workspace_dir_name))
  187. old_workspace_base = config.workspace_base
  188. old_workspace_mount_path = config.workspace_mount_path
  189. try:
  190. config.workspace_base = workspace_base
  191. config.workspace_mount_path = workspace_base
  192. # linting python after editing helps LLM fix indentations
  193. config.sandbox.enable_auto_lint = True
  194. # create folder for transferring files back/forth
  195. biocoder_cache_folder = 'biocoder_cache'
  196. if not os.path.exists(os.path.join(workspace_base, biocoder_cache_folder)):
  197. os.makedirs(
  198. os.path.join(workspace_base, biocoder_cache_folder), exist_ok=True
  199. )
  200. file_ext = {
  201. 'python': 'py',
  202. 'java': 'java',
  203. 'c': 'c',
  204. 'cpp': 'cpp',
  205. 'javascript': 'js',
  206. 'typescript': 'ts',
  207. }[instance.language.lower()]
  208. context_path = os.path.join(
  209. workspace_base, biocoder_cache_folder, 'context.' + file_ext
  210. )
  211. generated_path = os.path.join(
  212. workspace_base, biocoder_cache_folder, 'generated.' + file_ext
  213. )
  214. golden_path = os.path.join(
  215. workspace_base, biocoder_cache_folder, 'golden.' + file_ext
  216. )
  217. # print(instance.contextCode)
  218. with open(context_path, 'w') as f:
  219. f.write(instance.contextCode)
  220. with open(generated_path, 'w') as f:
  221. f.write(instance.goldenCode)
  222. with open(golden_path, 'w') as f:
  223. f.write(instance.goldenCode)
  224. testcase_json = {
  225. 'test_case_id': instance.test_case_id,
  226. 'num_cases': 1000,
  227. 'language': instance.language.lower(),
  228. }
  229. with open(
  230. os.path.join(
  231. workspace_base, biocoder_cache_folder, 'testcase_biocoder.json'
  232. ),
  233. 'w',
  234. ) as f:
  235. f.write(json.dumps(testcase_json, indent=4))
  236. # linting python after editing helps LLM fix indentations
  237. config.sandbox.enable_auto_lint = True
  238. sandbox = cls(
  239. container_image=BIOCODER_BENCH_CONTAINER_IMAGE,
  240. biocoder_instance_id=instance.test_case_id,
  241. biocoder_instance=instance,
  242. skip_workspace_mount=skip_workspace_mount,
  243. sandbox_plugins=sandbox_plugins,
  244. biocoder_cache_folder=biocoder_cache_folder,
  245. workspace_dir_name=workspace_dir_name,
  246. )
  247. except Exception:
  248. raise
  249. finally:
  250. config.workspace_base = old_workspace_base
  251. config.workspace_mount_path = old_workspace_mount_path
  252. sandbox.context_path = context_path
  253. sandbox.generated_path = generated_path
  254. sandbox.golden_path = golden_path
  255. logger.info(f'SSH box started for instance {instance.test_case_id}.')
  256. # cd to the workspace
  257. exit_code, output = sandbox.execute_and_check(
  258. 'cd /workspace', 'Failed to cd to workspace'
  259. )
  260. logger.info(f'cd to workspace: {output}')
  261. # download repository archive
  262. repository_url = f"https://biocoder.lilbillbiscuit.com/repos/{instance.repository.split('/')[1]}.zip"
  263. exit_code, output = sandbox.execute_and_check(
  264. 'wget -O repo.zip ' + repository_url, 'Failed to download the repository'
  265. )
  266. logger.info(f'Downloaded the repository: {output}')
  267. exit_code, output = sandbox.execute_and_check(
  268. 'unzip -o -q repo.zip', 'Failed to unzip the repository'
  269. )
  270. logger.info(f'Unzipped the repository: {output}')
  271. # copy the context, generated and golden files to the /testing_files folder
  272. exit_code, output = sandbox.execute_and_check(
  273. f'cp -r /workspace/{biocoder_cache_folder}/* /testing_files',
  274. 'Failed to copy the files',
  275. )
  276. # chmod 777
  277. exit_code, output = sandbox.execute_and_check(
  278. 'chmod -R 777 /workspace',
  279. 'Failed to chmod the files',
  280. )
  281. return sandbox
  282. if __name__ == '__main__':
  283. biocoder_dataset = load_dataset('Lilbillbiscuit/biocoder_public')
  284. EXAMPLE_INSTANCE = biocoder_dataset['test'][0]
  285. EXAMPLE_INSTANCE = BiocoderData(**EXAMPLE_INSTANCE)
  286. sandbox = BiocoderSSHBox.get_box_for_instance(
  287. instance=EXAMPLE_INSTANCE,
  288. workspace_mount_path='/home/ubuntu/OpenDevinBioCoder/workspace',
  289. skip_workspace_mount=False,
  290. sandbox_plugins=[JupyterRequirement(), SWEAgentCommandsRequirement()],
  291. )
  292. # PRE TEST
  293. exit_code, output = sandbox.execute_and_check(
  294. 'cd /testing',
  295. 'Failed to cd /testing',
  296. )
  297. logger.info(f'cd $REPO_PATH: {output}')
  298. exit_code, output = sandbox.execute_and_check(
  299. 'whoami',
  300. 'Failed to run whoami',
  301. )
  302. logger.info(f'whoami: {output}')
  303. # TEST
  304. exit_code, output = sandbox.execute(
  305. '/home/devin/mambaforge/bin/mamba run -n test python3 /testing/start_test_opendevin.py'
  306. )
  307. assert exit_code == 0, 'Expected exit code 0 (this should have passed)'
  308. logger.info(f'$TEST_CMD:\n{output}')
  309. exit_code, output = sandbox.execute_and_check(
  310. 'cat /testing_files/results_biocoder.json', 'Failed to read the result file'
  311. )
  312. print(output)
  313. json_obj = json.loads(output)
  314. if json_obj['result'] == 'pass':
  315. print('PASS')
  316. else:
  317. print('FAIL')
  318. sys.stdout.flush()
  319. try:
  320. while True:
  321. try:
  322. user_input = input('>>> ')
  323. except EOFError:
  324. logger.info('Exiting...')
  325. break
  326. if user_input.lower() == 'exit':
  327. logger.info('Exiting...')
  328. break
  329. exit_code, output = sandbox.execute(user_input)
  330. logger.info('exit code: %d', exit_code)
  331. logger.info(output)
  332. sys.stdout.flush()
  333. except KeyboardInterrupt:
  334. logger.info('Exiting...')
  335. sandbox.close()