mirror of
https://github.com/pocketpaw/pocketpaw.git
synced 2026-05-13 21:21:53 +00:00
- Fix import sorting in test_usage_tracker.py - Reformat tool_bridge.py - Remove docs/plans directory (internal design docs)
210 lines
7.8 KiB
Python
210 lines
7.8 KiB
Python
"""Tests for usage_tracker.py — UsageTracker fixes.
|
||
|
||
[FI] Fix: two bugs in UsageTracker:
|
||
|
||
1. total_tokens excluded cached_input_tokens.
|
||
In `record()`, total was computed as `input_tokens + output_tokens`,
|
||
silently dropping cached tokens from the count even though they are real
|
||
tokens processed by the model.
|
||
|
||
2. get_summary() called get_records(limit=10_000) instead of reading all
|
||
records, so any installation with more than 10 000 lifetime records would
|
||
silently produce wrong (understated) aggregation totals.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import json
|
||
|
||
import pytest
|
||
|
||
from pocketpaw.usage_tracker import UsageTracker, _estimate_cost
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Bug 1 – total_tokens must include cached_input_tokens
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
class TestTotalTokensIncludesCachedInput:
|
||
"""total_tokens = input + output + cached_input (not just input + output)."""
|
||
|
||
def test_total_tokens_with_cached(self, tmp_path):
|
||
tracker = UsageTracker(path=tmp_path / "usage.jsonl")
|
||
rec = tracker.record(
|
||
backend="anthropic",
|
||
model="claude-3-5-sonnet-20241022",
|
||
input_tokens=100,
|
||
output_tokens=50,
|
||
cached_input_tokens=200,
|
||
)
|
||
assert rec.total_tokens == 350 # 100 + 50 + 200
|
||
|
||
def test_total_tokens_without_cached(self, tmp_path):
|
||
tracker = UsageTracker(path=tmp_path / "usage.jsonl")
|
||
rec = tracker.record(
|
||
backend="openai",
|
||
model="gpt-4o",
|
||
input_tokens=80,
|
||
output_tokens=40,
|
||
cached_input_tokens=0,
|
||
)
|
||
assert rec.total_tokens == 120 # 80 + 40 + 0
|
||
|
||
def test_total_tokens_persisted_correctly(self, tmp_path):
|
||
path = tmp_path / "usage.jsonl"
|
||
tracker = UsageTracker(path=path)
|
||
tracker.record(
|
||
backend="anthropic",
|
||
model="claude-3-5-sonnet-20241022",
|
||
input_tokens=10,
|
||
output_tokens=20,
|
||
cached_input_tokens=30,
|
||
)
|
||
line = path.read_text().strip()
|
||
data = json.loads(line)
|
||
assert data["total_tokens"] == 60 # 10 + 20 + 30
|
||
|
||
def test_summary_total_tokens_includes_cached(self, tmp_path):
|
||
tracker = UsageTracker(path=tmp_path / "usage.jsonl")
|
||
tracker.record(
|
||
backend="anthropic",
|
||
model="claude-3-5-sonnet-20241022",
|
||
input_tokens=100,
|
||
output_tokens=50,
|
||
cached_input_tokens=200,
|
||
)
|
||
tracker.record(
|
||
backend="anthropic",
|
||
model="claude-3-5-sonnet-20241022",
|
||
input_tokens=50,
|
||
output_tokens=25,
|
||
cached_input_tokens=100,
|
||
)
|
||
summary = tracker.get_summary()
|
||
# (100+50+200) + (50+25+100) = 350 + 175 = 525
|
||
assert summary["total_tokens"] == 525
|
||
assert summary["total_cached_input_tokens"] == 300
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Bug 2 – get_summary() must aggregate ALL records, not just the last 10 000
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
class TestSummaryCoversAllRecords:
|
||
"""get_summary() should cover every record ever written."""
|
||
|
||
def _write_n_records(self, path, n: int) -> None:
|
||
"""Write n minimal records directly to the JSONL file."""
|
||
lines = []
|
||
for i in range(n):
|
||
lines.append(
|
||
json.dumps(
|
||
{
|
||
"timestamp": f"2026-01-{(i % 28) + 1:02d}T00:00:00+00:00",
|
||
"backend": "openai",
|
||
"model": "gpt-4o-mini",
|
||
"input_tokens": 10,
|
||
"output_tokens": 5,
|
||
"cached_input_tokens": 0,
|
||
"total_tokens": 15,
|
||
"cost_usd": None,
|
||
"session_id": "",
|
||
}
|
||
)
|
||
)
|
||
path.write_text("\n".join(lines) + "\n")
|
||
|
||
def test_summary_counts_all_records_beyond_default_limit(self, tmp_path):
|
||
"""With 150 records, summary request_count must be 150, not 100."""
|
||
path = tmp_path / "usage.jsonl"
|
||
self._write_n_records(path, 150)
|
||
tracker = UsageTracker(path=path)
|
||
summary = tracker.get_summary()
|
||
assert summary["request_count"] == 150
|
||
assert summary["total_input_tokens"] == 150 * 10
|
||
|
||
def test_summary_counts_all_records_beyond_old_hardcoded_limit(self, tmp_path):
|
||
"""With 10_001 records, summary must not cap at 10_000."""
|
||
path = tmp_path / "usage.jsonl"
|
||
self._write_n_records(path, 10_001)
|
||
tracker = UsageTracker(path=path)
|
||
summary = tracker.get_summary()
|
||
assert summary["request_count"] == 10_001
|
||
assert summary["total_output_tokens"] == 10_001 * 5
|
||
|
||
def test_get_records_still_respects_limit(self, tmp_path):
|
||
"""get_records(limit=N) is unaffected — it should still cap at N."""
|
||
path = tmp_path / "usage.jsonl"
|
||
self._write_n_records(path, 200)
|
||
tracker = UsageTracker(path=path)
|
||
assert len(tracker.get_records(limit=50)) == 50
|
||
assert len(tracker.get_records(limit=100)) == 100
|
||
|
||
def test_summary_since_filter_works_with_all_records(self, tmp_path):
|
||
"""The `since` filter must still work when all records are scanned."""
|
||
path = tmp_path / "usage.jsonl"
|
||
# Write 5 old + 5 new records
|
||
old = [
|
||
json.dumps(
|
||
{
|
||
"timestamp": "2025-01-01T00:00:00+00:00",
|
||
"backend": "anthropic",
|
||
"model": "claude-3-5-sonnet-20241022",
|
||
"input_tokens": 1,
|
||
"output_tokens": 1,
|
||
"cached_input_tokens": 0,
|
||
"total_tokens": 2,
|
||
"cost_usd": None,
|
||
"session_id": "",
|
||
}
|
||
)
|
||
for _ in range(5)
|
||
]
|
||
new = [
|
||
json.dumps(
|
||
{
|
||
"timestamp": "2026-03-01T00:00:00+00:00",
|
||
"backend": "anthropic",
|
||
"model": "claude-3-5-sonnet-20241022",
|
||
"input_tokens": 10,
|
||
"output_tokens": 10,
|
||
"cached_input_tokens": 0,
|
||
"total_tokens": 20,
|
||
"cost_usd": None,
|
||
"session_id": "",
|
||
}
|
||
)
|
||
for _ in range(5)
|
||
]
|
||
path.write_text("\n".join(old + new) + "\n")
|
||
tracker = UsageTracker(path=path)
|
||
summary = tracker.get_summary(since="2026-01-01T00:00:00+00:00")
|
||
assert summary["request_count"] == 5
|
||
assert summary["total_input_tokens"] == 50
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# _estimate_cost sanity checks
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
class TestEstimateCost:
|
||
def test_known_model(self):
|
||
cost = _estimate_cost("gpt-4o-mini", 1_000_000, 0)
|
||
assert cost == pytest.approx(0.15, rel=1e-3)
|
||
|
||
def test_prefix_match(self):
|
||
# "gpt-4o-2024-11-20" should match "gpt-4o" pricing
|
||
cost = _estimate_cost("gpt-4o-2024-11-20", 1_000_000, 0)
|
||
assert cost == pytest.approx(2.50, rel=1e-3)
|
||
|
||
def test_unknown_model_returns_none(self):
|
||
assert _estimate_cost("unknown-model-xyz", 100, 50) is None
|
||
|
||
def test_cached_input_billed_at_lower_rate(self):
|
||
# For claude-3-5-sonnet: input=3.0, cached_input=0.30, output=15.0
|
||
# 0 fresh input, 1M cached, 0 output → 0.30 USD
|
||
cost = _estimate_cost("claude-3-5-sonnet-20241022", 0, 0, cached_input_tokens=1_000_000)
|
||
assert cost == pytest.approx(0.30, rel=1e-3)
|