|
@@ -1,10 +1,7 @@
|
|
|
from enum import Enum
|
|
from enum import Enum
|
|
|
-from typing import Union
|
|
|
|
|
|
|
+from typing import Literal
|
|
|
|
|
|
|
|
from pydantic import BaseModel, Field, model_serializer
|
|
from pydantic import BaseModel, Field, model_serializer
|
|
|
-from typing_extensions import Literal
|
|
|
|
|
-
|
|
|
|
|
-from openhands.core.logger import openhands_logger as logger
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ContentType(Enum):
|
|
class ContentType(Enum):
|
|
@@ -60,60 +57,24 @@ class Message(BaseModel):
|
|
|
|
|
|
|
|
@model_serializer
|
|
@model_serializer
|
|
|
def serialize_model(self) -> dict:
|
|
def serialize_model(self) -> dict:
|
|
|
- content: list[dict[str, str | dict[str, str]]] = []
|
|
|
|
|
-
|
|
|
|
|
- for item in self.content:
|
|
|
|
|
- if isinstance(item, TextContent):
|
|
|
|
|
- content.append(item.model_dump())
|
|
|
|
|
- elif isinstance(item, ImageContent):
|
|
|
|
|
- content.extend(item.model_dump())
|
|
|
|
|
-
|
|
|
|
|
- return {'content': content, 'role': self.role}
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-def format_messages(
|
|
|
|
|
- messages: Union[Message, list[Message]],
|
|
|
|
|
- with_images: bool,
|
|
|
|
|
- with_prompt_caching: bool,
|
|
|
|
|
-) -> list[dict]:
|
|
|
|
|
- if not isinstance(messages, list):
|
|
|
|
|
- messages = [messages]
|
|
|
|
|
-
|
|
|
|
|
- if with_images or with_prompt_caching:
|
|
|
|
|
- return [message.model_dump() for message in messages]
|
|
|
|
|
-
|
|
|
|
|
- converted_messages = []
|
|
|
|
|
- for message in messages:
|
|
|
|
|
- content_parts = []
|
|
|
|
|
- role = 'user'
|
|
|
|
|
-
|
|
|
|
|
- if isinstance(message, str) and message:
|
|
|
|
|
- content_parts.append(message)
|
|
|
|
|
- elif isinstance(message, dict):
|
|
|
|
|
- role = message.get('role', 'user')
|
|
|
|
|
- if 'content' in message and message['content']:
|
|
|
|
|
- content_parts.append(message['content'])
|
|
|
|
|
- elif isinstance(message, Message):
|
|
|
|
|
- role = message.role
|
|
|
|
|
- for content in message.content:
|
|
|
|
|
- if isinstance(content, list):
|
|
|
|
|
- for item in content:
|
|
|
|
|
- if isinstance(item, TextContent) and item.text:
|
|
|
|
|
- content_parts.append(item.text)
|
|
|
|
|
- elif isinstance(content, TextContent) and content.text:
|
|
|
|
|
- content_parts.append(content.text)
|
|
|
|
|
- else:
|
|
|
|
|
- logger.error(
|
|
|
|
|
- f'>>> `message` is not a string, dict, or Message: {type(message)}'
|
|
|
|
|
|
|
+ content: list[dict] | str
|
|
|
|
|
+ if self.role == 'system':
|
|
|
|
|
+ # For system role, concatenate all text content into a single string
|
|
|
|
|
+ content = '\n'.join(
|
|
|
|
|
+ item.text for item in self.content if isinstance(item, TextContent)
|
|
|
)
|
|
)
|
|
|
-
|
|
|
|
|
- if content_parts:
|
|
|
|
|
- content_str = '\n'.join(content_parts)
|
|
|
|
|
- converted_messages.append(
|
|
|
|
|
- {
|
|
|
|
|
- 'role': role,
|
|
|
|
|
- 'content': content_str,
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ elif self.role == 'assistant' and not self.contains_image:
|
|
|
|
|
+ # For assistant role without vision, concatenate all text content into a single string
|
|
|
|
|
+ content = '\n'.join(
|
|
|
|
|
+ item.text for item in self.content if isinstance(item, TextContent)
|
|
|
)
|
|
)
|
|
|
|
|
+ else:
|
|
|
|
|
+ # For user role or assistant role with vision enabled, serialize each content item
|
|
|
|
|
+ content = []
|
|
|
|
|
+ for item in self.content:
|
|
|
|
|
+ if isinstance(item, TextContent):
|
|
|
|
|
+ content.append(item.model_dump())
|
|
|
|
|
+ elif isinstance(item, ImageContent):
|
|
|
|
|
+ content.extend(item.model_dump())
|
|
|
|
|
|
|
|
- return converted_messages
|
|
|
|
|
|
|
+ return {'content': content, 'role': self.role}
|