1 год назад · d6128941b7
--- a/agenthub/codeact_agent/codeact_agent.py
+++ b/agenthub/codeact_agent/codeact_agent.py
@@ -45,7 +45,8 @@ print(math.pi)" > math.py
 
				 </execute>
			
 
				 {COMMAND_SEGMENT}
			
 
				 
			
 
				-When you are done, execute "exit" to close the shell and end the conversation.
			
 
				+When you are done, execute the following to close the shell and end the conversation:
			
 
				+<execute>exit</execute> 
			
 
				 """
			
 
				 
			
 
				 INVALID_INPUT_MESSAGE = (
			
@@ -54,15 +55,18 @@ INVALID_INPUT_MESSAGE = (
 
				     "If you already completed the task, please exit the shell by generating: <execute> exit </execute>."
			
 
				 )
			
 
				 
			
 
				-
			
 
				 def parse_response(response) -> str:
			
 
				     action = response.choices[0].message.content
			
 
				     if "<execute>" in action and "</execute>" not in action:
			
 
				         action += "</execute>"
			
 
				     return action
			
 
				 
			
 
				-
			
 
				 class CodeActAgent(Agent):
			
 
				+    """
			
 
				+    The Code Act Agent is a minimalist agent. 
			
 
				+    The agent works by passing the model a list of action-observaiton pairs and prompting the model to take the next step.
			
 
				+    """
			
 
				+    
			
 
				     def __init__(
			
 
				         self,
			
 
				         llm: LLM,
			
@@ -71,13 +75,27 @@ class CodeActAgent(Agent):
 
				         Initializes a new instance of the CodeActAgent class.
			
 
				 
			
 
				         Parameters:
			
 
				-        - instruction (str): The instruction for the agent to execute.
			
 
				-        - max_steps (int): The maximum number of steps to run the agent.
			
 
				+        - llm (LLM): The llm to be used by this agent
			
 
				         """
			
 
				         super().__init__(llm)
			
 
				         self.messages: List[Mapping[str, str]] = []
			
 
				 
			
 
				     def step(self, state: State) -> Action:
			
 
				+        """
			
 
				+        Performs one step using the Code Act Agent. 
			
 
				+        This includes gathering info on previous steps and prompting the model to make a command to execute.
			
 
				+
			
 
				+        Parameters:
			
 
				+        - state (State): used to get updated info and background commands
			
 
				+
			
 
				+        Returns:
			
 
				+        - CmdRunAction(command) - command action to run
			
 
				+        - AgentEchoAction(content=INVALID_INPUT_MESSAGE) - invalid command output
			
 
				+
			
 
				+        Raises:
			
 
				+        - NotImplementedError - for actions other than CmdOutputObservation or AgentMessageObservation
			
 
				+        """
			
 
				+
			
 
				         if len(self.messages) == 0:
			
 
				             assert state.plan.main_goal, "Expecting instruction to be set"
			
 
				             self.messages = [
			
--- a/agenthub/monologue_agent/agent.py
+++ b/agenthub/monologue_agent/agent.py
@@ -77,14 +77,34 @@ INITIAL_THOUGHTS = [
 
				 
			
 
				 
			
 
				 class MonologueAgent(Agent):
			
 
				+    """
			
 
				+    The Monologue Agent utilizes long and short term memory to complete tasks.
			
 
				+    Long term memory is stored as a LongTermMemory object and the model uses it to search for examples from the past.
			
 
				+    Short term memory is stored as a Monologue object and the model can condense it as necessary.
			
 
				+    """
			
 
				+
			
 
				     _initialized = False
			
 
				 
			
 
				     def __init__(self, llm: LLM):
			
 
				+        """
			
 
				+        Initializes the Monologue Agent with an llm, monologue, and memory.
			
 
				+
			
 
				+        Parameters:
			
 
				+        - llm (LLM): The llm to be used by this agent
			
 
				+        """
			
 
				         super().__init__(llm)
			
 
				         self.monologue = Monologue()
			
 
				         self.memory = LongTermMemory()
			
 
				 
			
 
				     def _add_event(self, event: dict):
			
 
				+        """
			
 
				+        Adds a new event to the agent's monologue and memory. 
			
 
				+        Monologue automatically condenses when it gets too large.
			
 
				+
			
 
				+        Parameters:
			
 
				+        - event (dict): The event that will be added to monologue and memory
			
 
				+        """
			
 
				+
			
 
				         if "extras" in event and "screenshot" in event["extras"]:
			
 
				             del event["extras"]["screenshot"]
			
 
				         if 'args' in event and 'output' in event['args'] and len(event['args']['output']) > MAX_OUTPUT_LENGTH:
			
@@ -95,7 +115,18 @@ class MonologueAgent(Agent):
 
				         if self.monologue.get_total_length() > MAX_MONOLOGUE_LENGTH:
			
 
				             self.monologue.condense(self.llm)
			
 
				 
			
 
				-    def _initialize(self, task):
			
 
				+    def _initialize(self, task: str):
			
 
				+        """
			
 
				+        Utilizes the INITIAL_THOUGHTS list to give the agent a context for it's capabilities and how to navigate the /workspace.
			
 
				+        Short circuted to return when already initialized.
			
 
				+
			
 
				+        Parameters:
			
 
				+        - task (str): The initial goal statement provided by the user 
			
 
				+
			
 
				+        Raises:
			
 
				+        - ValueError: If task is not provided
			
 
				+        """
			
 
				+
			
 
				         if self._initialized:
			
 
				             return
			
 
				 
			
@@ -148,6 +179,15 @@ class MonologueAgent(Agent):
 
				         self._initialized = True
			
 
				 
			
 
				     def step(self, state: State) -> Action:
			
 
				+        """
			
 
				+        Modifies the current state by adding the most recent actions and observations, then prompts the model to think about it's next action to take using monologue, memory, and hint.
			
 
				+
			
 
				+        Parameters:
			
 
				+        - state (State): The current state based on previous steps taken
			
 
				+
			
 
				+        Returns:
			
 
				+        - Action: The next action to take based on LLM response
			
 
				+        """
			
 
				         self._initialize(state.plan.main_goal)
			
 
				         for prev_action, obs in state.updated_info:
			
 
				             self._add_event(prev_action.to_dict())
			
@@ -168,5 +208,15 @@ class MonologueAgent(Agent):
 
				         return action
			
 
				 
			
 
				     def search_memory(self, query: str) -> List[str]:
			
 
				+        """
			
 
				+        Uses VectorIndexRetriever to find related memories within the long term memory.
			
 
				+        Uses search to produce top 10 results.
			
 
				+        
			
 
				+        Parameters:
			
 
				+        - query (str): The query that we want to find related memories for
			
 
				+
			
 
				+        Returns:
			
 
				+        - List[str]: A list of top 10 text results that matched the query
			
 
				+        """
			
 
				         return self.memory.search(query)
			
 
				 
			
--- a/agenthub/monologue_agent/utils/json.py
+++ b/agenthub/monologue_agent/utils/json.py
@@ -2,13 +2,29 @@ import json
 
				 from json_repair import repair_json
			
 
				 
			
 
				 def my_encoder(obj):
			
 
				+    """
			
 
				+    Encodes objects as dictionaries
			
 
				+
			
 
				+    Parameters:
			
 
				+    - obj (Object): An object that will be converted
			
 
				+
			
 
				+    Returns:
			
 
				+    - dict: If the object can be converted it is returned in dict format
			
 
				+    """
			
 
				     if hasattr(obj, "to_dict"):
			
 
				         return obj.to_dict()
			
 
				 
			
 
				 def dumps(obj, **kwargs):
			
 
				+    """
			
 
				+    Serialize an object to str format
			
 
				+    """
			
 
				+
			
 
				     return json.dumps(obj, default=my_encoder, **kwargs)
			
 
				 
			
 
				 def loads(s, **kwargs):
			
 
				+    """
			
 
				+    Create a JSON object from str
			
 
				+    """
			
 
				     s_repaired = repair_json(s)
			
 
				     return json.loads(s_repaired, **kwargs)
			
 
				 
			
--- a/agenthub/monologue_agent/utils/memory.py
+++ b/agenthub/monologue_agent/utils/memory.py
@@ -41,14 +41,28 @@ else:
 
				 
			
 
				 
			
 
				 class LongTermMemory:
			
 
				+    """
			
 
				+    Responsible for storing information that the agent can call on later for better insights and context.
			
 
				+    Uses chromadb to store and search through memories.
			
 
				+    """
			
 
				+
			
 
				     def __init__(self):
			
 
				+        """
			
 
				+        Initialize the chromadb and set up ChromaVectorStore for later use.
			
 
				+        """
			
 
				         db = chromadb.Client()
			
 
				         self.collection = db.get_or_create_collection(name="memories")
			
 
				         vector_store = ChromaVectorStore(chroma_collection=self.collection)
			
 
				         self.index = VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)
			
 
				         self.thought_idx = 0
			
 
				 
			
 
				-    def add_event(self, event):
			
 
				+    def add_event(self, event: dict):
			
 
				+        """
			
 
				+        Adds a new event to the long term memory with a unique id.
			
 
				+
			
 
				+        Parameters:
			
 
				+        - event (dict): The new event to be added to memory
			
 
				+        """
			
 
				         id = ""
			
 
				         t = ""
			
 
				         if "action" in event:
			
@@ -69,7 +83,17 @@ class LongTermMemory:
 
				         self.thought_idx += 1
			
 
				         self.index.insert(doc)
			
 
				 
			
 
				-    def search(self, query, k=10):
			
 
				+    def search(self, query: str, k: int=10):
			
 
				+        """
			
 
				+        Searches through the current memory using VectorIndexRetriever
			
 
				+
			
 
				+        Parameters:
			
 
				+        - query (str): A query to match search results to
			
 
				+        - k (int): Number of top results to return
			
 
				+
			
 
				+        Returns:
			
 
				+        - List[str]: List of top k results found in current memory
			
 
				+        """
			
 
				         retriever = VectorIndexRetriever(
			
 
				             index=self.index,
			
 
				             similarity_top_k=k,
			
--- a/agenthub/monologue_agent/utils/monologue.py
+++ b/agenthub/monologue_agent/utils/monologue.py
@@ -1,21 +1,50 @@
 
				 import traceback
			
 
				-
			
 
				+from opendevin.llm.llm import LLM
			
 
				 import agenthub.monologue_agent.utils.json as json
			
 
				 import agenthub.monologue_agent.utils.prompts as prompts
			
 
				 
			
 
				 class Monologue:
			
 
				+    """
			
 
				+    The monologue is a representation for the agent's internal monologue where it can think.
			
 
				+    The agent has the capability of using this monologue for whatever it wants.
			
 
				+    """
			
 
				+
			
 
				     def __init__(self):
			
 
				+        """
			
 
				+        Initialize the empty list of thoughts
			
 
				+        """
			
 
				         self.thoughts = []
			
 
				 
			
 
				     def add_event(self, t: dict):
			
 
				+        """
			
 
				+        Adds an event to memory if it is a valid event.
			
 
				+
			
 
				+        Parameters:
			
 
				+        - t (dict): The thought that we want to add to memory
			
 
				+
			
 
				+        Raises:
			
 
				+        - ValueError: If t is not a dict
			
 
				+        """
			
 
				         if not isinstance(t, dict):
			
 
				             raise ValueError("Event must be a dictionary")
			
 
				         self.thoughts.append(t)
			
 
				 
			
 
				     def get_thoughts(self):
			
 
				+        """
			
 
				+        Get the current thoughts of the agent.
			
 
				+
			
 
				+        Returns:
			
 
				+        - List: The list of thoughts that the agent has.
			
 
				+        """
			
 
				         return self.thoughts
			
 
				 
			
 
				     def get_total_length(self):
			
 
				+        """
			
 
				+        Gives the total number of characters in all thoughts
			
 
				+
			
 
				+        Returns:
			
 
				+        - Int: Total number of chars in thoughts.
			
 
				+        """
			
 
				         total_length = 0
			
 
				         for t in self.thoughts:
			
 
				             try:
			
@@ -24,7 +53,17 @@ class Monologue:
 
				                 print(f"Error serializing thought: {e}")
			
 
				         return total_length
			
 
				 
			
 
				-    def condense(self, llm):
			
 
				+    def condense(self, llm: LLM):
			
 
				+        """
			
 
				+        Attempts to condense the monologue by using the llm
			
 
				+
			
 
				+        Parameters:
			
 
				+        - llm (LLM): llm to be used for summarization
			
 
				+
			
 
				+        Raises:
			
 
				+        - RunTimeError: When the condensing process fails for any reason
			
 
				+        """
			
 
				+        
			
 
				         try:
			
 
				             prompt = prompts.get_summarize_monologue_prompt(self.thoughts)
			
 
				             messages = [{"content": prompt,"role": "user"}]
			
@@ -37,4 +76,4 @@ class Monologue:
 
				 
			
 
				 def strip_markdown(markdown_json):
			
 
				     # remove markdown code block
			
 
				-    return markdown_json.replace('```json\n', '').replace('```', '').strip()
			
 
				+    return markdown_json.replace('```json\n', '').replace('```', '').strip()
			
--- a/agenthub/monologue_agent/utils/prompts.py
+++ b/agenthub/monologue_agent/utils/prompts.py
@@ -87,7 +87,13 @@ You can also use the same action and args from the source monologue.
 
				 """
			
 
				 
			
 
				 
			
 
				-def get_summarize_monologue_prompt(thoughts):
			
 
				+def get_summarize_monologue_prompt(thoughts: List[dict]):
			
 
				+    """
			
 
				+    Gets the prompt for summarizing the monologue
			
 
				+
			
 
				+    Returns: 
			
 
				+    - str: A formatted string with the current monologue within the prompt
			
 
				+    """
			
 
				     return MONOLOGUE_SUMMARY_PROMPT % {
			
 
				         'monologue': json.dumps({'old_monologue': thoughts}, indent=2),
			
 
				     }
			
@@ -97,6 +103,18 @@ def get_request_action_prompt(
 
				         thoughts: List[dict],
			
 
				         background_commands_obs: List[CmdOutputObservation] = [],
			
 
				 ):
			
 
				+    """
			
 
				+    Gets the action prompt formatted with appropriate values.
			
 
				+
			
 
				+    Parameters:
			
 
				+    - task (str): The current task the agent is trying to accomplish
			
 
				+    - thoughts (List[dict]): The agent's current thoughts
			
 
				+    - background_commands_obs (List[CmdOutputObservation]): List of all observed background commands running
			
 
				+
			
 
				+    Returns:
			
 
				+    - str: Formatted prompt string with hint, task, monologue, and background included
			
 
				+    """
			
 
				+
			
 
				     hint = ''
			
 
				     if len(thoughts) > 0:
			
 
				         latest_thought = thoughts[-1]
			
@@ -115,8 +133,7 @@ def get_request_action_prompt(
 
				         for command_obs in background_commands_obs:
			
 
				             bg_commands_message += f"\n`{command_obs.command_id}`: {command_obs.command}"
			
 
				         bg_commands_message += "\nYou can end any process by sending a `kill` action with the numerical `id` above."
			
 
				-    latest_thought = thoughts[-1]
			
 
				-
			
 
				+        
			
 
				     return ACTION_PROMPT % {
			
 
				         'task': task,
			
 
				         'monologue': json.dumps(thoughts, indent=2),
			
@@ -125,6 +142,15 @@ def get_request_action_prompt(
 
				     }
			
 
				 
			
 
				 def parse_action_response(response: str) -> Action:
			
 
				+    """
			
 
				+    Parses a string to find an action within it
			
 
				+
			
 
				+    Parameters:
			
 
				+    - response (str): The string to be parsed
			
 
				+
			
 
				+    Returns:
			
 
				+    - Action: The action that was found in the response string
			
 
				+    """
			
 
				     json_start = response.find("{")
			
 
				     json_end = response.rfind("}") + 1
			
 
				     response = response[json_start:json_end]
			
@@ -135,5 +161,14 @@ def parse_action_response(response: str) -> Action:
 
				     return action_from_dict(action_dict)
			
 
				 
			
 
				 def parse_summary_response(response: str) -> List[dict]:
			
 
				+    """
			
 
				+    Parses a summary of the monologue
			
 
				+
			
 
				+    Parameters:
			
 
				+    - response (str): The response string to be parsed
			
 
				+
			
 
				+    Returns:
			
 
				+    - List[dict]: The list of summaries output by the model
			
 
				+    """
			
 
				     parsed = json.loads(response)
			
 
				     return parsed['new_monologue']
			
--- a/agenthub/planner_agent/agent.py
+++ b/agenthub/planner_agent/agent.py
@@ -8,10 +8,33 @@ from opendevin.state import State
 
				 from opendevin.action import Action
			
 
				 
			
 
				 class PlannerAgent(Agent):
			
 
				+    """
			
 
				+    The planner agent utilizes a special prompting strategy to create long term plans for solving problems.
			
 
				+    The agent is given its previous action-observation pairs, current task, and hint based on last action taken at every step.
			
 
				+    """
			
 
				+
			
 
				     def __init__(self, llm: LLM):
			
 
				+        """
			
 
				+        Initialize the Planner Agent with an LLM
			
 
				+
			
 
				+        Parameters:
			
 
				+        - llm (LLM): The llm to be used by this agent
			
 
				+        """
			
 
				         super().__init__(llm)
			
 
				 
			
 
				     def step(self, state: State) -> Action:
			
 
				+        """
			
 
				+        Checks to see if current step is completed, returns AgentFinishAction if True. 
			
 
				+        Otherwise, creates a plan prompt and sends to model for inference, returning the result as the next action.
			
 
				+
			
 
				+        Parameters:
			
 
				+        - state (State): The current state given the previous actions and observations
			
 
				+
			
 
				+        Returns:
			
 
				+        - AgentFinishAction: If the last state was 'completed', 'verified', or 'abandoned'
			
 
				+        - Action: The next action to take based on llm response
			
 
				+        """
			
 
				+
			
 
				         if state.plan.task.state in ['completed', 'verified', 'abandoned']:
			
 
				             return AgentFinishAction()
			
 
				         prompt = get_prompt(state.plan, state.history)
			
--- a/agenthub/planner_agent/prompt.py
+++ b/agenthub/planner_agent/prompt.py
@@ -129,7 +129,19 @@ What is your next thought or action? Again, you must reply with JSON, and only w
 
				 %(hint)s
			
 
				 """
			
 
				 
			
 
				-def get_prompt(plan: Plan, history: List[Tuple[Action, Observation]]):
			
 
				+def get_prompt(plan: Plan, history: List[Tuple[Action, Observation]]) -> str:
			
 
				+    """
			
 
				+    Gets the prompt for the planner agent. 
			
 
				+    Formatted with the most recent action-observation pairs, current task, and hint based on last action
			
 
				+
			
 
				+    Parameters:
			
 
				+    - plan (Plan): The original plan outlined by the user with LLM defined tasks
			
 
				+    - history (List[Tuple[Action, Observation]]): List of corresponding action-observation pairs
			
 
				+
			
 
				+    Returns:
			
 
				+    - str: The formatted string prompt with historical values
			
 
				+    """
			
 
				+
			
 
				     plan_str = json.dumps(plan.task.to_dict(), indent=2)
			
 
				     sub_history = history[-HISTORY_SIZE:]
			
 
				     history_dicts = []
			
@@ -191,6 +203,15 @@ def get_prompt(plan: Plan, history: List[Tuple[Action, Observation]]):
 
				     }
			
 
				 
			
 
				 def parse_response(response: str) -> Action:
			
 
				+    """
			
 
				+    Parses the model output to find a valid action to take
			
 
				+
			
 
				+    Parameters:
			
 
				+    - response (str): A response from the model that potentially contains an Action.
			
 
				+
			
 
				+    Returns:
			
 
				+    - Action: A valid next action to perform from model output
			
 
				+    """
			
 
				     json_start = response.find("{")
			
 
				     json_end = response.rfind("}") + 1
			
 
				     response = response[json_start:json_end]
			
--- a/docs/documentation/Agents.md
+++ b/docs/documentation/Agents.md
@@ -0,0 +1,96 @@
 
				+# Agents and Capabilities
			
 
				+
			
 
				+## Monologue Agent:
			
 
				+
			
 
				+### Description:
			
 
				+The Monologue Agent utilizes long and short term memory to complete tasks.
			
 
				+Long term memory is stored as a LongTermMemory object and the model uses it to search for examples from the past.
			
 
				+Short term memory is stored as a Monologue object and the model can condense it as necessary.
			
 
				+
			
 
				+### Actions:
			
 
				+`Action`,
			
 
				+`NullAction`,
			
 
				+`CmdRunAction`,
			
 
				+`FileWriteAction`,
			
 
				+`FileReadAction`,
			
 
				+`AgentRecallAction`,
			
 
				+`BrowseURLAction`,
			
 
				+`AgentThinkAction`
			
 
				+
			
 
				+### Observations:
			
 
				+`Observation`,
			
 
				+`NullObservation`,
			
 
				+`CmdOutputObservation`,
			
 
				+`FileReadObservation`,
			
 
				+`AgentRecallObservation`,
			
 
				+`BrowserOutputObservation`
			
 
				+
			
 
				+
			
 
				+### Methods:
			
 
				+`__init__`: Initializes the agent with a long term memory, and an internal monologue
			
 
				+
			
 
				+`_add_event`: Appends events to the monologue of the agent and condenses with summary automatically if the monologue is too long
			
 
				+
			
 
				+`_initialize`: Utilizes the `INITIAL_THOUGHTS` list to give the agent a context for its capabilities and how to navigate the `/workspace`
			
 
				+
			
 
				+`step`: Modifies the current state by adding the most rescent actions and observations, then prompts the model to think about its next action to take. 
			
 
				+
			
 
				+`search_memory`: Uses `VectorIndexRetriever` to find related memories within the long term memory.
			
 
				+
			
 
				+## Planner Agent:
			
 
				+
			
 
				+### Description:
			
 
				+The planner agent utilizes a special prompting strategy to create long term plans for solving problems.
			
 
				+The agent is given its previous action-observation pairs, current task, and hint based on last action taken at every step.
			
 
				+
			
 
				+### Actions:
			
 
				+`NullAction`,
			
 
				+`CmdRunAction`,
			
 
				+`CmdKillAction`,
			
 
				+`BrowseURLAction`,
			
 
				+`FileReadAction`,
			
 
				+`FileWriteAction`,
			
 
				+`AgentRecallAction`,
			
 
				+`AgentThinkAction`,
			
 
				+`AgentFinishAction`,
			
 
				+`AgentSummarizeAction`,
			
 
				+`AddTaskAction`,
			
 
				+`ModifyTaskAction`,
			
 
				+
			
 
				+
			
 
				+### Observations:
			
 
				+`Observation`,
			
 
				+`NullObservation`,
			
 
				+`CmdOutputObservation`,
			
 
				+`FileReadObservation`,
			
 
				+`AgentRecallObservation`,
			
 
				+`BrowserOutputObservation`
			
 
				+
			
 
				+### Methods:
			
 
				+`__init__`: Initializes an agent with `llm`
			
 
				+
			
 
				+`step`: Checks to see if current step is completed, returns `AgentFinishAction` if True. Otherwise, creates a plan prompt and sends to model for inference, adding the result as the next action.
			
 
				+
			
 
				+`search_memory`: Not yet implemented
			
 
				+
			
 
				+## CodeAct Agent:
			
 
				+
			
 
				+### Description:
			
 
				+The Code Act Agent is a minimalist agent. The agent works by passing the model a list of action-observaiton pairs and prompting the model to take the next step.
			
 
				+
			
 
				+### Actions:
			
 
				+`Action`,
			
 
				+`CmdRunAction`,
			
 
				+`AgentEchoAction`,
			
 
				+`AgentFinishAction`,
			
 
				+
			
 
				+### Observations:
			
 
				+`CmdOutputObservation`,
			
 
				+`AgentMessageObservation`,
			
 
				+
			
 
				+### Methods:
			
 
				+`__init__`: Initializes an agent with `llm` and a list of messages `List[Mapping[str, str]]`
			
 
				+
			
 
				+`step`: First, gets messages from state and then compiles them into a list for context. Next, pass the context list with the prompt to get the next command to execute. Finally, Execute command if valid, else return `AgentEchoAction(INVALID_INPUT_MESSAGE)` 
			
 
				+
			
 
				+`search_memory`: Not yet implemented
			
--- a/docs/documentation/LOCAL_LLM_GUIDE.md
+++ b/docs/documentation/LOCAL_LLM_GUIDE.md
--- a/docs/documentation/images/ollama.png
+++ b/docs/documentation/images/ollama.png