Răsfoiți Sursa

[eval,fix]: metrics get carried across eval instances (#3072)

* fix: make max_budget_per_task optional in `run_agent_controller`

* update arg for each run infer

* fix: metrics logging carried along; reset llm metric with the agent;

---------

Co-authored-by: Graham Neubig <neubig@gmail.com>
Xingyao Wang 1 an în urmă
părinte
comite
41a8bb3cf1

+ 3 - 0
opendevin/controller/agent.py

@@ -57,6 +57,9 @@ class Agent(ABC):
         # TODO clear history
         # TODO clear history
         self._complete = False
         self._complete = False
 
 
+        if self.llm:
+            self.llm.reset()
+
     @property
     @property
     def name(self):
     def name(self):
         return self.__class__.__name__
         return self.__class__.__name__

+ 2 - 2
opendevin/controller/state/state.py

@@ -97,9 +97,9 @@ class State:
     resume_state: AgentState | None = None
     resume_state: AgentState | None = None
     traffic_control_state: TrafficControlState = TrafficControlState.NORMAL
     traffic_control_state: TrafficControlState = TrafficControlState.NORMAL
     # global metrics for the current task
     # global metrics for the current task
-    metrics: Metrics = Metrics()
+    metrics: Metrics = field(default_factory=Metrics)
     # local metrics for the current subtask
     # local metrics for the current subtask
-    local_metrics: Metrics = Metrics()
+    local_metrics: Metrics = field(default_factory=Metrics)
     # root agent has level 0, and every delegate increases the level by one
     # root agent has level 0, and every delegate increases the level by one
     delegate_level: int = 0
     delegate_level: int = 0
     # start_id and end_id track the range of events in history
     # start_id and end_id track the range of events in history

+ 3 - 0
opendevin/llm/llm.py

@@ -252,3 +252,6 @@ class LLM:
 
 
     def __repr__(self):
     def __repr__(self):
         return str(self)
         return str(self)
+
+    def reset(self):
+        self.metrics = Metrics()