test_bash_parsing.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. import pytest
  2. from openhands.runtime.utils.bash import split_bash_commands
  3. def test_split_commands_util():
  4. cmds = [
  5. 'ls -l',
  6. 'echo -e "hello\nworld"',
  7. """
  8. echo -e "hello it\\'s me"
  9. """.strip(),
  10. """
  11. echo \\
  12. -e 'hello' \\
  13. -v
  14. """.strip(),
  15. """
  16. echo -e 'hello\\nworld\\nare\\nyou\\nthere?'
  17. """.strip(),
  18. """
  19. echo -e 'hello
  20. world
  21. are
  22. you\\n
  23. there?'
  24. """.strip(),
  25. """
  26. echo -e 'hello
  27. world "
  28. '
  29. """.strip(),
  30. """
  31. kubectl apply -f - <<EOF
  32. apiVersion: v1
  33. kind: Pod
  34. metadata:
  35. name: busybox-sleep
  36. spec:
  37. containers:
  38. - name: busybox
  39. image: busybox:1.28
  40. args:
  41. - sleep
  42. - "1000000"
  43. EOF
  44. """.strip(),
  45. """
  46. mkdir -p _modules && \
  47. for month in {01..04}; do
  48. for day in {01..05}; do
  49. touch "_modules/2024-${month}-${day}-sample.md"
  50. done
  51. done
  52. """.strip(),
  53. ]
  54. joined_cmds = '\n'.join(cmds)
  55. split_cmds = split_bash_commands(joined_cmds)
  56. for s in split_cmds:
  57. print('\nCMD')
  58. print(s)
  59. for i in range(len(cmds)):
  60. assert (
  61. split_cmds[i].strip() == cmds[i].strip()
  62. ), f'At index {i}: {split_cmds[i]} != {cmds[i]}.'
  63. @pytest.mark.parametrize(
  64. 'input_command, expected_output',
  65. [
  66. ('ls -l', ['ls -l']),
  67. ("echo 'Hello, world!'", ["echo 'Hello, world!'"]),
  68. ('cd /tmp && touch test.txt', ['cd /tmp && touch test.txt']),
  69. ("echo -e 'line1\\nline2\\nline3'", ["echo -e 'line1\\nline2\\nline3'"]),
  70. (
  71. "grep 'pattern' file.txt | sort | uniq",
  72. ["grep 'pattern' file.txt | sort | uniq"],
  73. ),
  74. ('for i in {1..5}; do echo $i; done', ['for i in {1..5}; do echo $i; done']),
  75. (
  76. "echo 'Single quotes don\\'t escape'",
  77. ["echo 'Single quotes don\\'t escape'"],
  78. ),
  79. (
  80. 'echo "Double quotes \\"do\\" escape"',
  81. ['echo "Double quotes \\"do\\" escape"'],
  82. ),
  83. ],
  84. )
  85. def test_single_commands(input_command, expected_output):
  86. assert split_bash_commands(input_command) == expected_output
  87. def test_heredoc():
  88. input_commands = """
  89. cat <<EOF
  90. multiline
  91. text
  92. EOF
  93. echo "Done"
  94. """
  95. expected_output = ['cat <<EOF\nmultiline\ntext\nEOF', 'echo "Done"']
  96. assert split_bash_commands(input_commands) == expected_output
  97. def test_backslash_continuation():
  98. input_commands = """
  99. echo "This is a long \
  100. command that spans \
  101. multiple lines"
  102. echo "Next command"
  103. """
  104. expected_output = [
  105. 'echo "This is a long command that spans multiple lines"',
  106. 'echo "Next command"',
  107. ]
  108. assert split_bash_commands(input_commands) == expected_output
  109. def test_comments():
  110. input_commands = """
  111. echo "Hello" # This is a comment
  112. # This is another comment
  113. ls -l
  114. """
  115. expected_output = [
  116. 'echo "Hello" # This is a comment\n# This is another comment',
  117. 'ls -l',
  118. ]
  119. assert split_bash_commands(input_commands) == expected_output
  120. def test_complex_quoting():
  121. input_commands = """
  122. echo "This is a \\"quoted\\" string"
  123. echo 'This is a '\''single-quoted'\'' string'
  124. echo "Mixed 'quotes' in \\"double quotes\\""
  125. """
  126. expected_output = [
  127. 'echo "This is a \\"quoted\\" string"',
  128. "echo 'This is a '''single-quoted''' string'",
  129. 'echo "Mixed \'quotes\' in \\"double quotes\\""',
  130. ]
  131. assert split_bash_commands(input_commands) == expected_output
  132. def test_invalid_syntax():
  133. invalid_inputs = [
  134. 'echo "Unclosed quote',
  135. "echo 'Unclosed quote",
  136. 'cat <<EOF\nUnclosed heredoc',
  137. ]
  138. for input_command in invalid_inputs:
  139. # it will fall back to return the original input
  140. assert split_bash_commands(input_command) == [input_command]
  141. @pytest.fixture
  142. def sample_commands():
  143. return [
  144. 'ls -l',
  145. 'echo "Hello, world!"',
  146. 'cd /tmp && touch test.txt',
  147. 'echo -e "line1\\nline2\\nline3"',
  148. 'grep "pattern" file.txt | sort | uniq',
  149. 'for i in {1..5}; do echo $i; done',
  150. 'cat <<EOF\nmultiline\ntext\nEOF',
  151. 'echo "Escaped \\"quotes\\""',
  152. "echo 'Single quotes don\\'t escape'",
  153. 'echo "Command with a trailing backslash \\\n and continuation"',
  154. ]
  155. def test_split_single_commands(sample_commands):
  156. for cmd in sample_commands:
  157. result = split_bash_commands(cmd)
  158. assert len(result) == 1, f'Expected single command, got: {result}'
  159. def test_split_commands_with_heredoc():
  160. input_commands = """
  161. cat <<EOF
  162. multiline
  163. text
  164. EOF
  165. echo "Done"
  166. """
  167. expected_output = ['cat <<EOF\nmultiline\ntext\nEOF', 'echo "Done"']
  168. result = split_bash_commands(input_commands)
  169. assert result == expected_output, f'Expected {expected_output}, got {result}'
  170. def test_split_commands_with_backslash_continuation():
  171. input_commands = """
  172. echo "This is a long \
  173. command that spans \
  174. multiple lines"
  175. echo "Next command"
  176. """
  177. expected_output = [
  178. 'echo "This is a long command that spans multiple lines"',
  179. 'echo "Next command"',
  180. ]
  181. result = split_bash_commands(input_commands)
  182. assert result == expected_output, f'Expected {expected_output}, got {result}'
  183. def test_split_commands_with_empty_lines():
  184. input_commands = """
  185. ls -l
  186. echo "Hello"
  187. cd /tmp
  188. """
  189. expected_output = ['ls -l', 'echo "Hello"', 'cd /tmp']
  190. result = split_bash_commands(input_commands)
  191. assert result == expected_output, f'Expected {expected_output}, got {result}'
  192. def test_split_commands_with_comments():
  193. input_commands = """
  194. echo "Hello" # This is a comment
  195. # This is another comment
  196. ls -l
  197. """
  198. expected_output = [
  199. 'echo "Hello" # This is a comment\n# This is another comment',
  200. 'ls -l',
  201. ]
  202. result = split_bash_commands(input_commands)
  203. assert result == expected_output, f'Expected {expected_output}, got {result}'
  204. def test_split_commands_with_complex_quoting():
  205. input_commands = """
  206. echo "This is a \\"quoted\\" string"
  207. echo "Mixed 'quotes' in \\"double quotes\\""
  208. """
  209. # echo 'This is a '\''single-quoted'\'' string'
  210. expected_output = [
  211. 'echo "This is a \\"quoted\\" string"',
  212. 'echo "Mixed \'quotes\' in \\"double quotes\\""',
  213. ]
  214. # "echo 'This is a '\\''single-quoted'\\'' string'",
  215. result = split_bash_commands(input_commands)
  216. assert result == expected_output, f'Expected {expected_output}, got {result}'
  217. def test_split_commands_with_invalid_input():
  218. invalid_inputs = [
  219. 'echo "Unclosed quote',
  220. "echo 'Unclosed quote",
  221. 'cat <<EOF\nUnclosed heredoc',
  222. ]
  223. for input_command in invalid_inputs:
  224. # it will fall back to return the original input
  225. assert split_bash_commands(input_command) == [input_command]