test_bash_parsing.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. import pytest
  2. from opendevin.runtime.utils.bash import split_bash_commands
  3. def test_split_commands_util():
  4. cmds = [
  5. 'ls -l',
  6. 'echo -e "hello\nworld"',
  7. """
  8. echo -e "hello it\\'s me"
  9. """.strip(),
  10. """
  11. echo \\
  12. -e 'hello' \\
  13. -v
  14. """.strip(),
  15. """
  16. echo -e 'hello\\nworld\\nare\\nyou\\nthere?'
  17. """.strip(),
  18. """
  19. echo -e 'hello
  20. world
  21. are
  22. you\\n
  23. there?'
  24. """.strip(),
  25. """
  26. echo -e 'hello
  27. world "
  28. '
  29. """.strip(),
  30. """
  31. kubectl apply -f - <<EOF
  32. apiVersion: v1
  33. kind: Pod
  34. metadata:
  35. name: busybox-sleep
  36. spec:
  37. containers:
  38. - name: busybox
  39. image: busybox:1.28
  40. args:
  41. - sleep
  42. - "1000000"
  43. EOF
  44. """.strip(),
  45. """
  46. mkdir -p _modules && \
  47. for month in {01..04}; do
  48. for day in {01..05}; do
  49. touch "_modules/2024-${month}-${day}-sample.md"
  50. done
  51. done
  52. """.strip(),
  53. ]
  54. joined_cmds = '\n'.join(cmds)
  55. split_cmds = split_bash_commands(joined_cmds)
  56. for s in split_cmds:
  57. print('\nCMD')
  58. print(s)
  59. for i in range(len(cmds)):
  60. assert (
  61. split_cmds[i].strip() == cmds[i].strip()
  62. ), f'At index {i}: {split_cmds[i]} != {cmds[i]}.'
  63. @pytest.mark.parametrize(
  64. 'input_command, expected_output',
  65. [
  66. ('ls -l', ['ls -l']),
  67. ("echo 'Hello, world!'", ["echo 'Hello, world!'"]),
  68. ('cd /tmp && touch test.txt', ['cd /tmp && touch test.txt']),
  69. ("echo -e 'line1\\nline2\\nline3'", ["echo -e 'line1\\nline2\\nline3'"]),
  70. (
  71. "grep 'pattern' file.txt | sort | uniq",
  72. ["grep 'pattern' file.txt | sort | uniq"],
  73. ),
  74. ('for i in {1..5}; do echo $i; done', ['for i in {1..5}; do echo $i; done']),
  75. (
  76. "echo 'Single quotes don\\'t escape'",
  77. ["echo 'Single quotes don\\'t escape'"],
  78. ),
  79. (
  80. 'echo "Double quotes \\"do\\" escape"',
  81. ['echo "Double quotes \\"do\\" escape"'],
  82. ),
  83. ],
  84. )
  85. def test_single_commands(input_command, expected_output):
  86. assert split_bash_commands(input_command) == expected_output
  87. def test_heredoc():
  88. input_commands = """
  89. cat <<EOF
  90. multiline
  91. text
  92. EOF
  93. echo "Done"
  94. """
  95. expected_output = ['cat <<EOF\nmultiline\ntext\nEOF', 'echo "Done"']
  96. assert split_bash_commands(input_commands) == expected_output
  97. def test_jupyter_heredoc():
  98. """This tests specifically test the behavior of the bash parser
  99. when the input is a heredoc for a Jupyter cell (used in ServerRuntime).
  100. It will failed to parse bash commands AND fall back to the original input,
  101. which won't cause issues in actual execution.
  102. [input]: cat > /tmp/opendevin_jupyter_temp.py <<'EOL'
  103. print('Hello, `World`!
  104. ')
  105. EOL
  106. [error]: here-document at line 0 delimited by end-of-file (wanted "'EOL'") (position 75)
  107. TODO: remove this tests after the deprecation of ServerRuntime
  108. """
  109. code = "print('Hello, `World`!\n')"
  110. input_commands = f"""cat > /tmp/opendevin_jupyter_temp.py <<'EOL'
  111. {code}
  112. EOL"""
  113. expected_output = [f"cat > /tmp/opendevin_jupyter_temp.py <<'EOL'\n{code}\nEOL"]
  114. assert split_bash_commands(input_commands) == expected_output
  115. def test_backslash_continuation():
  116. input_commands = """
  117. echo "This is a long \
  118. command that spans \
  119. multiple lines"
  120. echo "Next command"
  121. """
  122. expected_output = [
  123. 'echo "This is a long command that spans multiple lines"',
  124. 'echo "Next command"',
  125. ]
  126. assert split_bash_commands(input_commands) == expected_output
  127. def test_comments():
  128. input_commands = """
  129. echo "Hello" # This is a comment
  130. # This is another comment
  131. ls -l
  132. """
  133. expected_output = [
  134. 'echo "Hello" # This is a comment\n# This is another comment',
  135. 'ls -l',
  136. ]
  137. assert split_bash_commands(input_commands) == expected_output
  138. def test_complex_quoting():
  139. input_commands = """
  140. echo "This is a \\"quoted\\" string"
  141. echo 'This is a '\''single-quoted'\'' string'
  142. echo "Mixed 'quotes' in \\"double quotes\\""
  143. """
  144. expected_output = [
  145. 'echo "This is a \\"quoted\\" string"',
  146. "echo 'This is a '''single-quoted''' string'",
  147. 'echo "Mixed \'quotes\' in \\"double quotes\\""',
  148. ]
  149. assert split_bash_commands(input_commands) == expected_output
  150. def test_invalid_syntax():
  151. invalid_inputs = [
  152. 'echo "Unclosed quote',
  153. "echo 'Unclosed quote",
  154. 'cat <<EOF\nUnclosed heredoc',
  155. ]
  156. for input_command in invalid_inputs:
  157. # it will fall back to return the original input
  158. assert split_bash_commands(input_command) == [input_command]
  159. @pytest.fixture
  160. def sample_commands():
  161. return [
  162. 'ls -l',
  163. 'echo "Hello, world!"',
  164. 'cd /tmp && touch test.txt',
  165. 'echo -e "line1\\nline2\\nline3"',
  166. 'grep "pattern" file.txt | sort | uniq',
  167. 'for i in {1..5}; do echo $i; done',
  168. 'cat <<EOF\nmultiline\ntext\nEOF',
  169. 'echo "Escaped \\"quotes\\""',
  170. "echo 'Single quotes don\\'t escape'",
  171. 'echo "Command with a trailing backslash \\\n and continuation"',
  172. ]
  173. def test_split_single_commands(sample_commands):
  174. for cmd in sample_commands:
  175. result = split_bash_commands(cmd)
  176. assert len(result) == 1, f'Expected single command, got: {result}'
  177. def test_split_commands_with_heredoc():
  178. input_commands = """
  179. cat <<EOF
  180. multiline
  181. text
  182. EOF
  183. echo "Done"
  184. """
  185. expected_output = ['cat <<EOF\nmultiline\ntext\nEOF', 'echo "Done"']
  186. result = split_bash_commands(input_commands)
  187. assert result == expected_output, f'Expected {expected_output}, got {result}'
  188. def test_split_commands_with_backslash_continuation():
  189. input_commands = """
  190. echo "This is a long \
  191. command that spans \
  192. multiple lines"
  193. echo "Next command"
  194. """
  195. expected_output = [
  196. 'echo "This is a long command that spans multiple lines"',
  197. 'echo "Next command"',
  198. ]
  199. result = split_bash_commands(input_commands)
  200. assert result == expected_output, f'Expected {expected_output}, got {result}'
  201. def test_split_commands_with_empty_lines():
  202. input_commands = """
  203. ls -l
  204. echo "Hello"
  205. cd /tmp
  206. """
  207. expected_output = ['ls -l', 'echo "Hello"', 'cd /tmp']
  208. result = split_bash_commands(input_commands)
  209. assert result == expected_output, f'Expected {expected_output}, got {result}'
  210. def test_split_commands_with_comments():
  211. input_commands = """
  212. echo "Hello" # This is a comment
  213. # This is another comment
  214. ls -l
  215. """
  216. expected_output = [
  217. 'echo "Hello" # This is a comment\n# This is another comment',
  218. 'ls -l',
  219. ]
  220. result = split_bash_commands(input_commands)
  221. assert result == expected_output, f'Expected {expected_output}, got {result}'
  222. def test_split_commands_with_complex_quoting():
  223. input_commands = """
  224. echo "This is a \\"quoted\\" string"
  225. echo "Mixed 'quotes' in \\"double quotes\\""
  226. """
  227. # echo 'This is a '\''single-quoted'\'' string'
  228. expected_output = [
  229. 'echo "This is a \\"quoted\\" string"',
  230. 'echo "Mixed \'quotes\' in \\"double quotes\\""',
  231. ]
  232. # "echo 'This is a '\\''single-quoted'\\'' string'",
  233. result = split_bash_commands(input_commands)
  234. assert result == expected_output, f'Expected {expected_output}, got {result}'
  235. def test_split_commands_with_invalid_input():
  236. invalid_inputs = [
  237. 'echo "Unclosed quote',
  238. "echo 'Unclosed quote",
  239. 'cat <<EOF\nUnclosed heredoc',
  240. ]
  241. for input_command in invalid_inputs:
  242. # it will fall back to return the original input
  243. assert split_bash_commands(input_command) == [input_command]