config.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. import argparse
  2. import logging
  3. import os
  4. import pathlib
  5. import platform
  6. import toml
  7. from dotenv import load_dotenv
  8. from opendevin.schema import ConfigType
  9. logger = logging.getLogger(__name__)
  10. DEFAULT_CONTAINER_IMAGE = 'ghcr.io/opendevin/sandbox'
  11. if os.getenv('OPEN_DEVIN_BUILD_VERSION'):
  12. DEFAULT_CONTAINER_IMAGE += ':' + (os.getenv('OPEN_DEVIN_BUILD_VERSION') or '')
  13. else:
  14. DEFAULT_CONTAINER_IMAGE += ':main'
  15. load_dotenv()
  16. DEFAULT_CONFIG: dict = {
  17. ConfigType.LLM_API_KEY: None,
  18. ConfigType.LLM_BASE_URL: None,
  19. ConfigType.WORKSPACE_BASE: os.getcwd(),
  20. ConfigType.WORKSPACE_MOUNT_PATH: None,
  21. ConfigType.WORKSPACE_MOUNT_PATH_IN_SANDBOX: '/workspace',
  22. ConfigType.WORKSPACE_MOUNT_REWRITE: None,
  23. ConfigType.CACHE_DIR: '/tmp/cache', # '/tmp/cache' is the default cache directory
  24. ConfigType.LLM_MODEL: 'gpt-3.5-turbo-1106',
  25. ConfigType.SANDBOX_CONTAINER_IMAGE: DEFAULT_CONTAINER_IMAGE,
  26. ConfigType.RUN_AS_DEVIN: 'true',
  27. ConfigType.LLM_EMBEDDING_MODEL: 'local',
  28. ConfigType.LLM_EMBEDDING_BASE_URL: None,
  29. ConfigType.LLM_EMBEDDING_DEPLOYMENT_NAME: None,
  30. ConfigType.LLM_API_VERSION: None,
  31. ConfigType.LLM_NUM_RETRIES: 5,
  32. ConfigType.LLM_RETRY_MIN_WAIT: 3,
  33. ConfigType.LLM_RETRY_MAX_WAIT: 60,
  34. ConfigType.MAX_ITERATIONS: 100,
  35. ConfigType.AGENT_MEMORY_MAX_THREADS: 2,
  36. ConfigType.AGENT_MEMORY_ENABLED: False,
  37. ConfigType.LLM_TIMEOUT: None,
  38. ConfigType.LLM_MAX_RETURN_TOKENS: None,
  39. # GPT-4 pricing is $10 per 1M input tokens. Since tokenization happens on LLM side,
  40. # we cannot easily count number of tokens, but we can count characters.
  41. # Assuming 5 characters per token, 5 million is a reasonable default limit.
  42. ConfigType.MAX_CHARS: 5_000_000,
  43. ConfigType.AGENT: 'MonologueAgent',
  44. ConfigType.E2B_API_KEY: '',
  45. ConfigType.SANDBOX_TYPE: 'ssh', # Can be 'ssh', 'exec', or 'e2b'
  46. ConfigType.USE_HOST_NETWORK: 'false',
  47. ConfigType.SSH_HOSTNAME: 'localhost',
  48. ConfigType.DISABLE_COLOR: 'false',
  49. ConfigType.SANDBOX_USER_ID: os.getuid() if hasattr(os, 'getuid') else None,
  50. ConfigType.SANDBOX_TIMEOUT: 120,
  51. ConfigType.GITHUB_TOKEN: None,
  52. ConfigType.SANDBOX_USER_ID: None
  53. }
  54. config_str = ''
  55. if os.path.exists('config.toml'):
  56. with open('config.toml', 'rb') as f:
  57. config_str = f.read().decode('utf-8')
  58. def int_value(value, default, config_key):
  59. # FIXME use a library
  60. try:
  61. return int(value)
  62. except ValueError:
  63. logger.warning(f'Invalid value for {config_key}: {value} not applied. Using default value {default}')
  64. return default
  65. tomlConfig = toml.loads(config_str)
  66. config = DEFAULT_CONFIG.copy()
  67. for k, v in config.items():
  68. if k in os.environ:
  69. config[k] = os.environ[k]
  70. elif k in tomlConfig:
  71. config[k] = tomlConfig[k]
  72. if k in [ConfigType.LLM_NUM_RETRIES, ConfigType.LLM_RETRY_MIN_WAIT, ConfigType.LLM_RETRY_MAX_WAIT]:
  73. config[k] = int_value(config[k], v, config_key=k)
  74. # In local there is no sandbox, the workspace will have the same pwd as the host
  75. if config[ConfigType.SANDBOX_TYPE] == 'local':
  76. config[ConfigType.WORKSPACE_MOUNT_PATH_IN_SANDBOX] = config[ConfigType.WORKSPACE_MOUNT_PATH]
  77. def get_parser():
  78. parser = argparse.ArgumentParser(
  79. description='Run an agent with a specific task')
  80. parser.add_argument(
  81. '-d',
  82. '--directory',
  83. type=str,
  84. help='The working directory for the agent',
  85. )
  86. parser.add_argument(
  87. '-t', '--task', type=str, default='', help='The task for the agent to perform'
  88. )
  89. parser.add_argument(
  90. '-f',
  91. '--file',
  92. type=str,
  93. help='Path to a file containing the task. Overrides -t if both are provided.',
  94. )
  95. parser.add_argument(
  96. '-c',
  97. '--agent-cls',
  98. default=config.get(ConfigType.AGENT),
  99. type=str,
  100. help='The agent class to use',
  101. )
  102. parser.add_argument(
  103. '-m',
  104. '--model-name',
  105. default=config.get(ConfigType.LLM_MODEL),
  106. type=str,
  107. help='The (litellm) model name to use',
  108. )
  109. parser.add_argument(
  110. '-i',
  111. '--max-iterations',
  112. default=config.get(ConfigType.MAX_ITERATIONS),
  113. type=int,
  114. help='The maximum number of iterations to run the agent',
  115. )
  116. parser.add_argument(
  117. '-n',
  118. '--max-chars',
  119. default=config.get(ConfigType.MAX_CHARS),
  120. type=int,
  121. help='The maximum number of characters to send to and receive from LLM per task',
  122. )
  123. return parser
  124. def parse_arguments():
  125. parser = get_parser()
  126. args, _ = parser.parse_known_args()
  127. if args.directory:
  128. config[ConfigType.WORKSPACE_BASE] = os.path.abspath(args.directory)
  129. print(f'Setting workspace base to {config[ConfigType.WORKSPACE_BASE]}')
  130. return args
  131. args = parse_arguments()
  132. def finalize_config():
  133. if config.get(ConfigType.WORKSPACE_MOUNT_REWRITE) and not config.get(ConfigType.WORKSPACE_MOUNT_PATH):
  134. base = config.get(ConfigType.WORKSPACE_BASE) or os.getcwd()
  135. parts = config[ConfigType.WORKSPACE_MOUNT_REWRITE].split(':')
  136. config[ConfigType.WORKSPACE_MOUNT_PATH] = base.replace(parts[0], parts[1])
  137. if config.get(ConfigType.WORKSPACE_MOUNT_PATH) is None:
  138. config[ConfigType.WORKSPACE_MOUNT_PATH] = os.path.abspath(config[ConfigType.WORKSPACE_BASE])
  139. if config.get(ConfigType.LLM_EMBEDDING_BASE_URL) is None:
  140. config[ConfigType.LLM_EMBEDDING_BASE_URL] = config.get(ConfigType.LLM_BASE_URL)
  141. USE_HOST_NETWORK = config[ConfigType.USE_HOST_NETWORK].lower() != 'false'
  142. if USE_HOST_NETWORK and platform.system() == 'Darwin':
  143. logger.warning(
  144. 'Please upgrade to Docker Desktop 4.29.0 or later to use host network mode on macOS. '
  145. 'See https://github.com/docker/roadmap/issues/238#issuecomment-2044688144 for more information.'
  146. )
  147. config[ConfigType.USE_HOST_NETWORK] = USE_HOST_NETWORK
  148. if config.get(ConfigType.WORKSPACE_MOUNT_PATH) is None:
  149. config[ConfigType.WORKSPACE_MOUNT_PATH] = config.get(ConfigType.WORKSPACE_BASE)
  150. finalize_config()
  151. def get(key: ConfigType, required: bool = False):
  152. """
  153. Get a key from the environment variables or config.toml or default configs.
  154. """
  155. if not isinstance(key, ConfigType):
  156. raise ValueError(f"key '{key}' must be an instance of ConfigType Enum")
  157. value = config.get(key)
  158. if not value and required:
  159. raise KeyError(f"Please set '{key}' in `config.toml` or `.env`.")
  160. return value
  161. _cache_dir = config.get(ConfigType.CACHE_DIR)
  162. if _cache_dir:
  163. pathlib.Path(_cache_dir).mkdir(parents=True, exist_ok=True)