1 anno fa · d18e6c85a0
--- a/agenthub/SWE_agent/agent.py
+++ b/agenthub/SWE_agent/agent.py
@@ -42,7 +42,7 @@ class SWEAgent(Agent):
 
				         self.running_memory.append(memory)
			
 
				 
			
 
				     def _think_act(self, messages: list[dict]) -> tuple[Action, str]:
			
 
				-        resp = self.llm.completion(
			
 
				+        resp = self.llm.do_completion(
			
 
				             messages=messages,
			
 
				             temperature=0.05,
			
 
				         )
			
--- a/agenthub/codeact_agent/codeact_agent.py
+++ b/agenthub/codeact_agent/codeact_agent.py
@@ -9,7 +9,6 @@ from agenthub.codeact_agent.prompt import (
 
				 )
			
 
				 from opendevin.controller.agent import Agent
			
 
				 from opendevin.controller.state.state import State
			
 
				-from opendevin.core.logger import opendevin_logger as logger
			
 
				 from opendevin.events.action import (
			
 
				     Action,
			
 
				     AgentFinishAction,
			
@@ -173,7 +172,6 @@ class CodeActAgent(Agent):
 
				         Resets the CodeAct Agent.
			
 
				         """
			
 
				         super().reset()
			
 
				-        self.cost_accumulator = 0
			
 
				 
			
 
				     def step(self, state: State) -> Action:
			
 
				         """
			
@@ -215,7 +213,7 @@ class CodeActAgent(Agent):
 
				                 f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task.'
			
 
				             )
			
 
				 
			
 
				-        response = self.llm.completion(
			
 
				+        response = self.llm.do_completion(
			
 
				             messages=messages,
			
 
				             stop=[
			
 
				                 '</execute_ipython>',
			
@@ -225,8 +223,6 @@ class CodeActAgent(Agent):
 
				             temperature=0.0,
			
 
				         )
			
 
				 
			
 
				-        self.log_cost(response)
			
 
				-
			
 
				         action_str: str = parse_response(response)
			
 
				         state.num_of_chars += sum(
			
 
				             len(message['content']) for message in messages
			
@@ -269,15 +265,3 @@ class CodeActAgent(Agent):
 
				 
			
 
				     def search_memory(self, query: str) -> list[str]:
			
 
				         raise NotImplementedError('Implement this abstract method')
			
 
				-
			
 
				-    def log_cost(self, response):
			
 
				-        try:
			
 
				-            cur_cost = self.llm.completion_cost(response)
			
 
				-        except Exception:
			
 
				-            cur_cost = 0
			
 
				-        self.cost_accumulator += cur_cost
			
 
				-        logger.info(
			
 
				-            'Cost: %.2f USD | Accumulated Cost: %.2f USD',
			
 
				-            cur_cost,
			
 
				-            self.cost_accumulator,
			
 
				-        )
			
--- a/agenthub/micro/agent.py
+++ b/agenthub/micro/agent.py
@@ -65,7 +65,7 @@ class MicroAgent(Agent):
 
				             latest_user_message=latest_user_message,
			
 
				         )
			
 
				         messages = [{'content': prompt, 'role': 'user'}]
			
 
				-        resp = self.llm.completion(messages=messages)
			
 
				+        resp = self.llm.do_completion(messages=messages)
			
 
				         action_resp = resp['choices'][0]['message']['content']
			
 
				         state.num_of_chars += len(prompt) + len(action_resp)
			
 
				         action = parse_response(action_resp)
			
--- a/agenthub/monologue_agent/agent.py
+++ b/agenthub/monologue_agent/agent.py
@@ -242,7 +242,7 @@ class MonologueAgent(Agent):
 
				             state.background_commands_obs,
			
 
				         )
			
 
				         messages = [{'content': prompt, 'role': 'user'}]
			
 
				-        resp = self.llm.completion(messages=messages)
			
 
				+        resp = self.llm.do_completion(messages=messages)
			
 
				         action_resp = resp['choices'][0]['message']['content']
			
 
				         state.num_of_chars += len(prompt) + len(action_resp)
			
 
				         action = prompts.parse_action_response(action_resp)
			
--- a/agenthub/planner_agent/agent.py
+++ b/agenthub/planner_agent/agent.py
@@ -43,7 +43,7 @@ class PlannerAgent(Agent):
 
				             return AgentFinishAction()
			
 
				         prompt = get_prompt(state)
			
 
				         messages = [{'content': prompt, 'role': 'user'}]
			
 
				-        resp = self.llm.completion(messages=messages)
			
 
				+        resp = self.llm.do_completion(messages=messages)
			
 
				         action_resp = resp['choices'][0]['message']['content']
			
 
				         state.num_of_chars += len(prompt) + len(action_resp)
			
 
				         action = parse_response(action_resp)
			
--- a/opendevin/controller/agent_controller.py
+++ b/opendevin/controller/agent_controller.py
@@ -89,6 +89,8 @@ class AgentController:
 
				 
			
 
				     def update_state_after_step(self):
			
 
				         self.state.updated_info = []
			
 
				+        # update metrics especially for cost
			
 
				+        self.state.metrics = self.agent.llm.metrics
			
 
				 
			
 
				     async def report_error(self, message: str, exception: Exception | None = None):
			
 
				         self.state.error = message
			
--- a/opendevin/controller/state/state.py
+++ b/opendevin/controller/state/state.py
@@ -4,6 +4,7 @@ from dataclasses import dataclass, field
 
				 
			
 
				 from opendevin.controller.state.task import RootTask
			
 
				 from opendevin.core.logger import opendevin_logger as logger
			
 
				+from opendevin.core.metrics import Metrics
			
 
				 from opendevin.core.schema import AgentState
			
 
				 from opendevin.events.action import (
			
 
				     Action,
			
@@ -30,6 +31,7 @@ class State:
 
				     outputs: dict = field(default_factory=dict)
			
 
				     error: str | None = None
			
 
				     agent_state: AgentState = AgentState.LOADING
			
 
				+    metrics: Metrics = Metrics()
			
 
				 
			
 
				     def save_to_session(self, sid: str):
			
 
				         fs = get_file_store()
			
--- a/opendevin/core/metrics.py
+++ b/opendevin/core/metrics.py
@@ -0,0 +1,46 @@
 
				+class Metrics:
			
 
				+    """
			
 
				+    Metrics class can record various metrics during running and evaluation.
			
 
				+    Currently we define the following metrics:
			
 
				+        accumulated_cost: the total cost (USD $) of the current LLM.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self) -> None:
			
 
				+        self._accumulated_cost: float = 0.0
			
 
				+        self._costs: list[float] = []
			
 
				+
			
 
				+    @property
			
 
				+    def accumulated_cost(self) -> float:
			
 
				+        return self._accumulated_cost
			
 
				+
			
 
				+    @accumulated_cost.setter
			
 
				+    def accumulated_cost(self, value: float) -> None:
			
 
				+        if value < 0:
			
 
				+            raise ValueError('Total cost cannot be negative.')
			
 
				+        self._accumulated_cost = value
			
 
				+
			
 
				+    @property
			
 
				+    def costs(self) -> list:
			
 
				+        return self._costs
			
 
				+
			
 
				+    def add_cost(self, value: float) -> None:
			
 
				+        if value < 0:
			
 
				+            raise ValueError('Added cost cannot be negative.')
			
 
				+        self._accumulated_cost += value
			
 
				+        self._costs.append(value)
			
 
				+
			
 
				+    def get(self):
			
 
				+        """
			
 
				+        Return the metrics in a dictionary.
			
 
				+        """
			
 
				+        return {'accumulated_cost': self._accumulated_cost, 'costs': self._costs}
			
 
				+
			
 
				+    def log(self):
			
 
				+        """
			
 
				+        Log the metrics.
			
 
				+        """
			
 
				+        metrics = self.get()
			
 
				+        logs = ''
			
 
				+        for key, value in metrics.items():
			
 
				+            logs += f'{key}: {value}\n'
			
 
				+        return logs
			
--- a/opendevin/llm/llm.py
+++ b/opendevin/llm/llm.py
@@ -21,6 +21,7 @@ from tenacity import (
 
				 from opendevin.core.config import config
			
 
				 from opendevin.core.logger import llm_prompt_logger, llm_response_logger
			
 
				 from opendevin.core.logger import opendevin_logger as logger
			
 
				+from opendevin.core.metrics import Metrics
			
 
				 
			
 
				 __all__ = ['LLM']
			
 
				 
			
@@ -58,6 +59,7 @@ class LLM:
 
				         max_input_tokens=None,
			
 
				         max_output_tokens=None,
			
 
				         llm_config=None,
			
 
				+        metrics=None,
			
 
				     ):
			
 
				         """
			
 
				         Initializes the LLM. If LLMConfig is passed, its values will be the fallback.
			
@@ -77,7 +79,7 @@ class LLM:
 
				             custom_llm_provider (str, optional): A custom LLM provider. Defaults to LLM_CUSTOM_LLM_PROVIDER.
			
 
				             llm_timeout (int, optional): The maximum time to wait for a response in seconds. Defaults to LLM_TIMEOUT.
			
 
				             llm_temperature (float, optional): The temperature for LLM sampling. Defaults to LLM_TEMPERATURE.
			
 
				-
			
 
				+            metrics (Metrics, optional): The metrics object to use. Defaults to None.
			
 
				         """
			
 
				         if llm_config is None:
			
 
				             llm_config = config.llm
			
@@ -112,6 +114,7 @@ class LLM:
 
				             if max_output_tokens is not None
			
 
				             else llm_config.max_output_tokens
			
 
				         )
			
 
				+        metrics = metrics if metrics is not None else Metrics()
			
 
				 
			
 
				         logger.info(f'Initializing LLM with model: {model}')
			
 
				         self.model_name = model
			
@@ -122,6 +125,7 @@ class LLM:
 
				         self.max_output_tokens = max_output_tokens
			
 
				         self.llm_timeout = llm_timeout
			
 
				         self.custom_llm_provider = custom_llm_provider
			
 
				+        self.metrics = metrics
			
 
				 
			
 
				         # litellm actually uses base Exception here for unknown model
			
 
				         self.model_info = None
			
@@ -200,6 +204,30 @@ class LLM:
 
				         """
			
 
				         return self._completion
			
 
				 
			
 
				+    def do_completion(self, *args, **kwargs):
			
 
				+        """
			
 
				+        Wrapper for the litellm completion function.
			
 
				+
			
 
				+        Check the complete documentation at https://litellm.vercel.app/docs/completion
			
 
				+        """
			
 
				+        resp = self._completion(*args, **kwargs)
			
 
				+        self.post_completion(resp)
			
 
				+        return resp
			
 
				+
			
 
				+    def post_completion(self, response: str) -> None:
			
 
				+        """
			
 
				+        Post-process the completion response.
			
 
				+        """
			
 
				+        try:
			
 
				+            cur_cost = self.completion_cost(response)
			
 
				+        except Exception:
			
 
				+            cur_cost = 0
			
 
				+        logger.info(
			
 
				+            'Cost: %.2f USD | Accumulated Cost: %.2f USD',
			
 
				+            cur_cost,
			
 
				+            self.metrics.accumulated_cost,
			
 
				+        )
			
 
				+
			
 
				     def get_token_count(self, messages):
			
 
				         """
			
 
				         Get the number of tokens in a list of messages.
			
@@ -231,6 +259,7 @@ class LLM:
 
				     def completion_cost(self, response):
			
 
				         """
			
 
				         Calculate the cost of a completion response based on the model.  Local models are treated as free.
			
 
				+        Add the current cost into total cost in metrics.
			
 
				 
			
 
				         Args:
			
 
				             response (list): A response from a model invocation.
			
@@ -241,6 +270,7 @@ class LLM:
 
				         if not self.is_local():
			
 
				             try:
			
 
				                 cost = litellm_completion_cost(completion_response=response)
			
 
				+                self.metrics.add_cost(cost)
			
 
				                 return cost
			
 
				             except Exception:
			
 
				                 logger.warning('Cost calculation not supported for this model.')
			
--- a/opendevin/memory/condenser.py
+++ b/opendevin/memory/condenser.py
@@ -16,7 +16,7 @@ class MemoryCondenser:
 
				 
			
 
				         try:
			
 
				             messages = [{'content': summarize_prompt, 'role': 'user'}]
			
 
				-            resp = llm.completion(messages=messages)
			
 
				+            resp = llm.do_completion(messages=messages)
			
 
				             summary_response = resp['choices'][0]['message']['content']
			
 
				             return summary_response
			
 
				         except Exception as e:
			
--- a/tests/unit/test_micro_agents.py
+++ b/tests/unit/test_micro_agents.py
@@ -31,7 +31,7 @@ def test_coder_agent_with_summary():
 
				     """
			
 
				     mock_llm = MagicMock()
			
 
				     content = json.dumps({'action': 'finish', 'args': {}})
			
 
				-    mock_llm.completion.return_value = {'choices': [{'message': {'content': content}}]}
			
 
				+    mock_llm.do_completion.return_value = {'choices': [{'message': {'content': content}}]}
			
 
				 
			
 
				     coder_agent = Agent.get_cls('CoderAgent')(llm=mock_llm)
			
 
				     assert coder_agent is not None
			
@@ -43,8 +43,8 @@ def test_coder_agent_with_summary():
 
				     state = State(history=history, inputs={'summary': summary})
			
 
				     coder_agent.step(state)
			
 
				 
			
 
				-    mock_llm.completion.assert_called_once()
			
 
				-    _, kwargs = mock_llm.completion.call_args
			
 
				+    mock_llm.do_completion.assert_called_once()
			
 
				+    _, kwargs = mock_llm.do_completion.call_args
			
 
				     prompt = kwargs['messages'][0]['content']
			
 
				     assert task in prompt
			
 
				     assert "Here's a summary of the codebase, as it relates to this task" in prompt
			
@@ -58,7 +58,7 @@ def test_coder_agent_without_summary():
 
				     """
			
 
				     mock_llm = MagicMock()
			
 
				     content = json.dumps({'action': 'finish', 'args': {}})
			
 
				-    mock_llm.completion.return_value = {'choices': [{'message': {'content': content}}]}
			
 
				+    mock_llm.do_completion.return_value = {'choices': [{'message': {'content': content}}]}
			
 
				 
			
 
				     coder_agent = Agent.get_cls('CoderAgent')(llm=mock_llm)
			
 
				     assert coder_agent is not None
			
@@ -69,8 +69,8 @@ def test_coder_agent_without_summary():
 
				     state = State(history=history)
			
 
				     coder_agent.step(state)
			
 
				 
			
 
				-    mock_llm.completion.assert_called_once()
			
 
				-    _, kwargs = mock_llm.completion.call_args
			
 
				+    mock_llm.do_completion.assert_called_once()
			
 
				+    _, kwargs = mock_llm.do_completion.call_args
			
 
				     prompt = kwargs['messages'][0]['content']
			
 
				     assert task in prompt
			
 
				     assert "Here's a summary of the codebase, as it relates to this task" not in prompt