biocoder_env_box.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. import json
  2. import os
  3. import re
  4. import sys
  5. from collections import defaultdict
  6. from dataclasses import dataclass
  7. from datasets import load_dataset
  8. from opendevin.core.config import config
  9. from opendevin.core.logger import opendevin_logger as logger
  10. from opendevin.runtime.docker.ssh_box import DockerSSHBox
  11. from opendevin.runtime.plugins import (
  12. JupyterRequirement,
  13. PluginRequirement,
  14. SWEAgentCommandsRequirement,
  15. )
  16. BIOCODER_BENCH_CONTAINER_IMAGE = 'public.ecr.aws/i5g0m1f6/eval_biocoder:v1.0'
  17. @dataclass
  18. class BiocoderData:
  19. filePath: str
  20. numLines: int
  21. lineStart: int
  22. lineEnd: int
  23. signature: str
  24. comment: str
  25. content: str
  26. repository: str
  27. promptSummaryOnly: str
  28. contextCode: str
  29. goldenCode: str
  30. test_case_id: str
  31. language: str
  32. def to_dict(self):
  33. return {
  34. 'filePath': self.filePath,
  35. 'numLines': self.numLines,
  36. 'lineStart': self.lineStart,
  37. 'lineEnd': self.lineEnd,
  38. 'signature': self.signature,
  39. 'comment': self.comment,
  40. 'content': self.content,
  41. 'repository': self.repository,
  42. 'promptSummaryOnly': self.promptSummaryOnly,
  43. 'contextCode': self.contextCode,
  44. 'goldenCode': self.goldenCode,
  45. 'test_case_id': self.test_case_id,
  46. 'language': self.language,
  47. }
  48. def get_likely_indent_size(array_of_tabs) -> int:
  49. sizes = defaultdict(int)
  50. for i in range(len(array_of_tabs) - 1):
  51. diff = array_of_tabs[i + 1] - array_of_tabs[i]
  52. if diff > 0:
  53. sizes[diff] += 1
  54. if len(sizes) == 0:
  55. return 4
  56. return int(max(sizes, key=sizes.get))
  57. class BiocoderSSHBox(DockerSSHBox):
  58. def __init__(
  59. self,
  60. container_image: str,
  61. timeout: int = 120,
  62. sid: str | None = None,
  63. biocoder_instance_id: str | None = None,
  64. biocoder_instance: BiocoderData | None = None,
  65. skip_workspace_mount: bool = True,
  66. sandbox_plugins: list[PluginRequirement] = [], # noqa: B006
  67. biocoder_cache_folder: str = 'biocoder_cache',
  68. workspace_dir_name: str | None = None,
  69. ):
  70. if biocoder_instance_id is None:
  71. raise ValueError('biocoder_instance_id must be provided')
  72. self.biocoder_instance_id = biocoder_instance_id
  73. self.biocoder_instance = biocoder_instance
  74. self.skip_workspace_mount = skip_workspace_mount
  75. self.biocoder_cache_folder = biocoder_cache_folder
  76. self.first_line_after_removed = None
  77. self.workspace_dir_name = workspace_dir_name
  78. self.workspace_base = config.workspace_base
  79. self.workspace_mount_path = config.workspace_mount_path
  80. # self.workspace_dir_name_host = os.path.join(config.workspace_base, workspace_dir_name)
  81. self.context_path = None
  82. self.generated_path = None
  83. self.golden_path = None
  84. assert (
  85. container_image is not None
  86. ), 'container_image is required for BiocoderBenchSSHBox!'
  87. super().__init__(container_image, timeout, sid)
  88. self.init_plugins(sandbox_plugins)
  89. @property
  90. def volumes(self):
  91. if self.skip_workspace_mount:
  92. return {
  93. k: v
  94. for k, v in super().volumes.items()
  95. if not v['bind'] == self.sandbox_workspace_dir
  96. }
  97. return super().volumes
  98. def get_target_filepath(self):
  99. target_filepath = os.path.join(
  100. self.workspace_mount_path,
  101. self.biocoder_instance.repository.split('/')[1],
  102. self.biocoder_instance.filePath,
  103. )
  104. return target_filepath
  105. def get_changed_code(self, include_signature=False):
  106. # copies changed code into /testing_files/
  107. # Note that this does NOT copy the function signature
  108. target_filepath = self.get_target_filepath()
  109. selected_lines = []
  110. offset = 1 if include_signature else 0
  111. if self.first_line_after_removed is None:
  112. logger.warning('First line after removed is None')
  113. with open(target_filepath, 'r') as f:
  114. lines = f.read().split('\n')
  115. for i in range(self.biocoder_instance.lineStart - offset, len(lines)):
  116. if lines[i].strip() == self.first_line_after_removed.strip():
  117. break
  118. selected_lines.append(lines[i])
  119. text = '\n'.join(selected_lines)
  120. return text
  121. def copy_changed_code(self):
  122. changed_code = self.get_changed_code(include_signature=True)
  123. with open(self.generated_path, 'w') as f:
  124. f.write(changed_code)
  125. exit_code, output = self.execute_and_check(
  126. f'cp -r /workspace/{self.biocoder_cache_folder}/* /testing_files',
  127. 'Failed to copy the files',
  128. )
  129. def remove_code(self):
  130. comment_prefix = {'python': '#', 'java': '//'}
  131. target_filepath = self.get_target_filepath()
  132. line_start = self.biocoder_instance.lineStart
  133. line_end = self.biocoder_instance.lineEnd
  134. with open(target_filepath, 'r') as f:
  135. lines = f.read().split('\n')
  136. # print("="*10+"ORIGINAL"+"="*10)
  137. # print("\n".join(lines))
  138. signature_line = lines[line_start - 1]
  139. # get the number of tabs
  140. def get_indent_size(s: str):
  141. return len(re.match(r'\s*', s).group())
  142. indent_sizes = list(map(get_indent_size, lines))
  143. indent_size = get_likely_indent_size(indent_sizes)
  144. comment_indent_size = get_indent_size(signature_line) + indent_size
  145. lines = (
  146. lines[:line_start]
  147. + [
  148. f"{' '*comment_indent_size+comment_prefix[self.biocoder_instance.language.lower()]}TODO: replace with your code here"
  149. ]
  150. + ([''] * 2)
  151. + lines[line_end:]
  152. )
  153. first_line_after_removed_index = line_start
  154. while len(
  155. lines[first_line_after_removed_index].strip()
  156. ) == 0 and first_line_after_removed_index < len(lines):
  157. first_line_after_removed_index += 1
  158. self.first_line_after_removed = lines[first_line_after_removed_index]
  159. # print("FIRST LINE AFTER REMOVED: ", self.first_line_after_removed)
  160. with open(target_filepath, 'w') as f:
  161. f.write('\n'.join(lines))
  162. # with open(target_filepath, 'r') as f:
  163. # print("="*10+"MODIFIED"+"="*10)
  164. # print(f.read())
  165. def execute_and_check(self, cmd: str, error_msg: str) -> tuple[int, str]:
  166. exit_code, output = self.execute(cmd)
  167. if exit_code != 0:
  168. logger.error(error_msg)
  169. sys.exit(1)
  170. return exit_code, output
  171. @classmethod
  172. def get_box_for_instance(
  173. cls,
  174. instance,
  175. workspace_dir_name=None,
  176. skip_workspace_mount: bool = False,
  177. workspace_mount_path: str | None = None,
  178. sandbox_plugins: list[PluginRequirement] = [], # noqa: B006
  179. ) -> 'BiocoderSSHBox':
  180. """This method initializes a container image, then runs some initialization commands"""
  181. if workspace_dir_name is None:
  182. workspace_dir_name = f'{instance.repository}__{instance.test_case_id[:10]}__{os.getpid()}'.replace(
  183. '/', '__'
  184. )
  185. workspace_base = str(os.path.join(config.workspace_base, workspace_dir_name))
  186. old_workspace_base = config.workspace_base
  187. old_workspace_mount_path = config.workspace_mount_path
  188. try:
  189. config.workspace_base = workspace_base
  190. config.workspace_mount_path = workspace_base
  191. # linting python after editing helps LLM fix indentations
  192. config.enable_auto_lint = True
  193. # create folder for transferring files back/forth
  194. biocoder_cache_folder = 'biocoder_cache'
  195. if not os.path.exists(os.path.join(workspace_base, biocoder_cache_folder)):
  196. os.makedirs(
  197. os.path.join(workspace_base, biocoder_cache_folder), exist_ok=True
  198. )
  199. file_ext = {
  200. 'python': 'py',
  201. 'java': 'java',
  202. 'c': 'c',
  203. 'cpp': 'cpp',
  204. 'javascript': 'js',
  205. 'typescript': 'ts',
  206. }[instance.language.lower()]
  207. context_path = os.path.join(
  208. workspace_base, biocoder_cache_folder, 'context.' + file_ext
  209. )
  210. generated_path = os.path.join(
  211. workspace_base, biocoder_cache_folder, 'generated.' + file_ext
  212. )
  213. golden_path = os.path.join(
  214. workspace_base, biocoder_cache_folder, 'golden.' + file_ext
  215. )
  216. # print(instance.contextCode)
  217. with open(context_path, 'w') as f:
  218. f.write(instance.contextCode)
  219. with open(generated_path, 'w') as f:
  220. f.write(instance.goldenCode)
  221. with open(golden_path, 'w') as f:
  222. f.write(instance.goldenCode)
  223. testcase_json = {
  224. 'test_case_id': instance.test_case_id,
  225. 'num_cases': 1000,
  226. 'language': instance.language.lower(),
  227. }
  228. with open(
  229. os.path.join(
  230. workspace_base, biocoder_cache_folder, 'testcase_biocoder.json'
  231. ),
  232. 'w',
  233. ) as f:
  234. f.write(json.dumps(testcase_json, indent=4))
  235. # linting python after editing helps LLM fix indentations
  236. config.enable_auto_lint = True
  237. sandbox = cls(
  238. container_image=BIOCODER_BENCH_CONTAINER_IMAGE,
  239. biocoder_instance_id=instance.test_case_id,
  240. biocoder_instance=instance,
  241. skip_workspace_mount=skip_workspace_mount,
  242. sandbox_plugins=sandbox_plugins,
  243. biocoder_cache_folder=biocoder_cache_folder,
  244. workspace_dir_name=workspace_dir_name,
  245. )
  246. except Exception:
  247. raise
  248. finally:
  249. config.workspace_base = old_workspace_base
  250. config.workspace_mount_path = old_workspace_mount_path
  251. sandbox.context_path = context_path
  252. sandbox.generated_path = generated_path
  253. sandbox.golden_path = golden_path
  254. logger.info(f'SSH box started for instance {instance.test_case_id}.')
  255. # cd to the workspace
  256. exit_code, output = sandbox.execute_and_check(
  257. 'cd /workspace', 'Failed to cd to workspace'
  258. )
  259. logger.info(f'cd to workspace: {output}')
  260. # download repository archive
  261. repository_url = f"https://biocoder.lilbillbiscuit.com/repos/{instance.repository.split('/')[1]}.zip"
  262. exit_code, output = sandbox.execute_and_check(
  263. 'wget -O repo.zip ' + repository_url, 'Failed to download the repository'
  264. )
  265. logger.info(f'Downloaded the repository: {output}')
  266. exit_code, output = sandbox.execute_and_check(
  267. 'unzip -o -q repo.zip', 'Failed to unzip the repository'
  268. )
  269. logger.info(f'Unzipped the repository: {output}')
  270. # copy the context, generated and golden files to the /testing_files folder
  271. exit_code, output = sandbox.execute_and_check(
  272. f'cp -r /workspace/{biocoder_cache_folder}/* /testing_files',
  273. 'Failed to copy the files',
  274. )
  275. # chmod 777
  276. exit_code, output = sandbox.execute_and_check(
  277. 'chmod -R 777 /workspace',
  278. 'Failed to chmod the files',
  279. )
  280. return sandbox
  281. if __name__ == '__main__':
  282. biocoder_dataset = load_dataset('Lilbillbiscuit/biocoder_public')
  283. EXAMPLE_INSTANCE = biocoder_dataset['test'][0]
  284. EXAMPLE_INSTANCE = BiocoderData(**EXAMPLE_INSTANCE)
  285. sandbox = BiocoderSSHBox.get_box_for_instance(
  286. instance=EXAMPLE_INSTANCE,
  287. workspace_mount_path='/home/ubuntu/OpenDevinBioCoder/workspace',
  288. skip_workspace_mount=False,
  289. sandbox_plugins=[JupyterRequirement(), SWEAgentCommandsRequirement()],
  290. )
  291. # PRE TEST
  292. exit_code, output = sandbox.execute_and_check(
  293. 'cd /testing',
  294. 'Failed to cd /testing',
  295. )
  296. logger.info(f'cd $REPO_PATH: {output}')
  297. exit_code, output = sandbox.execute_and_check(
  298. 'whoami',
  299. 'Failed to run whoami',
  300. )
  301. logger.info(f'whoami: {output}')
  302. # TEST
  303. exit_code, output = sandbox.execute(
  304. '/home/devin/mambaforge/bin/mamba run -n test python3 /testing/start_test_opendevin.py'
  305. )
  306. assert exit_code == 0, 'Expected exit code 0 (this should have passed)'
  307. logger.info(f'$TEST_CMD:\n{output}')
  308. exit_code, output = sandbox.execute_and_check(
  309. 'cat /testing_files/results_biocoder.json', 'Failed to read the result file'
  310. )
  311. print(output)
  312. json_obj = json.loads(output)
  313. if json_obj['result'] == 'pass':
  314. print('PASS')
  315. else:
  316. print('FAIL')
  317. bg_cmd = sandbox.execute_in_background(
  318. "while true; do echo 'dot ' && sleep 10; done"
  319. )
  320. sys.stdout.flush()
  321. try:
  322. while True:
  323. try:
  324. user_input = input('>>> ')
  325. except EOFError:
  326. logger.info('Exiting...')
  327. break
  328. if user_input.lower() == 'exit':
  329. logger.info('Exiting...')
  330. break
  331. if user_input.lower() == 'kill':
  332. sandbox.kill_background(bg_cmd.pid)
  333. logger.info('Background process killed')
  334. continue
  335. exit_code, output = sandbox.execute(user_input)
  336. logger.info('exit code: %d', exit_code)
  337. logger.info(output)
  338. if bg_cmd.pid in sandbox.background_commands:
  339. logs = sandbox.read_logs(bg_cmd.pid)
  340. logger.info('background logs: %s', logs)
  341. sys.stdout.flush()
  342. except KeyboardInterrupt:
  343. logger.info('Exiting...')
  344. sandbox.close()