metrics.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. import time
  2. from pydantic import BaseModel, Field
  3. class Cost(BaseModel):
  4. model: str
  5. cost: float
  6. timestamp: float = Field(default_factory=time.time)
  7. class ResponseLatency(BaseModel):
  8. """Metric tracking the round-trip time per completion call."""
  9. model: str
  10. latency: float
  11. response_id: str
  12. class Metrics:
  13. """Metrics class can record various metrics during running and evaluation.
  14. Currently, we define the following metrics:
  15. accumulated_cost: the total cost (USD $) of the current LLM.
  16. response_latency: the time taken for each LLM completion call.
  17. """
  18. def __init__(self, model_name: str = 'default') -> None:
  19. self._accumulated_cost: float = 0.0
  20. self._costs: list[Cost] = []
  21. self._response_latencies: list[ResponseLatency] = []
  22. self.model_name = model_name
  23. @property
  24. def accumulated_cost(self) -> float:
  25. return self._accumulated_cost
  26. @accumulated_cost.setter
  27. def accumulated_cost(self, value: float) -> None:
  28. if value < 0:
  29. raise ValueError('Total cost cannot be negative.')
  30. self._accumulated_cost = value
  31. @property
  32. def costs(self) -> list[Cost]:
  33. return self._costs
  34. @property
  35. def response_latencies(self) -> list[ResponseLatency]:
  36. if not hasattr(self, '_response_latencies'):
  37. self._response_latencies = []
  38. return self._response_latencies
  39. @response_latencies.setter
  40. def response_latencies(self, value: list[ResponseLatency]) -> None:
  41. self._response_latencies = value
  42. def add_cost(self, value: float) -> None:
  43. if value < 0:
  44. raise ValueError('Added cost cannot be negative.')
  45. self._accumulated_cost += value
  46. self._costs.append(Cost(cost=value, model=self.model_name))
  47. def add_response_latency(self, value: float, response_id: str) -> None:
  48. self._response_latencies.append(
  49. ResponseLatency(
  50. latency=max(0.0, value), model=self.model_name, response_id=response_id
  51. )
  52. )
  53. def merge(self, other: 'Metrics') -> None:
  54. self._accumulated_cost += other.accumulated_cost
  55. self._costs += other._costs
  56. self._response_latencies += other._response_latencies
  57. def get(self) -> dict:
  58. """Return the metrics in a dictionary."""
  59. return {
  60. 'accumulated_cost': self._accumulated_cost,
  61. 'costs': [cost.model_dump() for cost in self._costs],
  62. 'response_latencies': [
  63. latency.model_dump() for latency in self._response_latencies
  64. ],
  65. }
  66. def reset(self):
  67. self._accumulated_cost = 0.0
  68. self._costs = []
  69. self._response_latencies = []
  70. def log(self):
  71. """Log the metrics."""
  72. metrics = self.get()
  73. logs = ''
  74. for key, value in metrics.items():
  75. logs += f'{key}: {value}\n'
  76. return logs
  77. def __repr__(self):
  78. return f'Metrics({self.get()}'