Files
pocketpaw/tests/test_usage_tracker.py
Rohit Kushwaha ab6c26430d chore: fix lint, format, and remove docs/plans
- Fix import sorting in test_usage_tracker.py
- Reformat tool_bridge.py
- Remove docs/plans directory (internal design docs)
2026-03-16 22:08:08 +05:30

210 lines
7.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Tests for usage_tracker.py — UsageTracker fixes.
[FI] Fix: two bugs in UsageTracker:
1. total_tokens excluded cached_input_tokens.
In `record()`, total was computed as `input_tokens + output_tokens`,
silently dropping cached tokens from the count even though they are real
tokens processed by the model.
2. get_summary() called get_records(limit=10_000) instead of reading all
records, so any installation with more than 10 000 lifetime records would
silently produce wrong (understated) aggregation totals.
"""
from __future__ import annotations
import json
import pytest
from pocketpaw.usage_tracker import UsageTracker, _estimate_cost
# ---------------------------------------------------------------------------
# Bug 1 total_tokens must include cached_input_tokens
# ---------------------------------------------------------------------------
class TestTotalTokensIncludesCachedInput:
"""total_tokens = input + output + cached_input (not just input + output)."""
def test_total_tokens_with_cached(self, tmp_path):
tracker = UsageTracker(path=tmp_path / "usage.jsonl")
rec = tracker.record(
backend="anthropic",
model="claude-3-5-sonnet-20241022",
input_tokens=100,
output_tokens=50,
cached_input_tokens=200,
)
assert rec.total_tokens == 350 # 100 + 50 + 200
def test_total_tokens_without_cached(self, tmp_path):
tracker = UsageTracker(path=tmp_path / "usage.jsonl")
rec = tracker.record(
backend="openai",
model="gpt-4o",
input_tokens=80,
output_tokens=40,
cached_input_tokens=0,
)
assert rec.total_tokens == 120 # 80 + 40 + 0
def test_total_tokens_persisted_correctly(self, tmp_path):
path = tmp_path / "usage.jsonl"
tracker = UsageTracker(path=path)
tracker.record(
backend="anthropic",
model="claude-3-5-sonnet-20241022",
input_tokens=10,
output_tokens=20,
cached_input_tokens=30,
)
line = path.read_text().strip()
data = json.loads(line)
assert data["total_tokens"] == 60 # 10 + 20 + 30
def test_summary_total_tokens_includes_cached(self, tmp_path):
tracker = UsageTracker(path=tmp_path / "usage.jsonl")
tracker.record(
backend="anthropic",
model="claude-3-5-sonnet-20241022",
input_tokens=100,
output_tokens=50,
cached_input_tokens=200,
)
tracker.record(
backend="anthropic",
model="claude-3-5-sonnet-20241022",
input_tokens=50,
output_tokens=25,
cached_input_tokens=100,
)
summary = tracker.get_summary()
# (100+50+200) + (50+25+100) = 350 + 175 = 525
assert summary["total_tokens"] == 525
assert summary["total_cached_input_tokens"] == 300
# ---------------------------------------------------------------------------
# Bug 2 get_summary() must aggregate ALL records, not just the last 10 000
# ---------------------------------------------------------------------------
class TestSummaryCoversAllRecords:
"""get_summary() should cover every record ever written."""
def _write_n_records(self, path, n: int) -> None:
"""Write n minimal records directly to the JSONL file."""
lines = []
for i in range(n):
lines.append(
json.dumps(
{
"timestamp": f"2026-01-{(i % 28) + 1:02d}T00:00:00+00:00",
"backend": "openai",
"model": "gpt-4o-mini",
"input_tokens": 10,
"output_tokens": 5,
"cached_input_tokens": 0,
"total_tokens": 15,
"cost_usd": None,
"session_id": "",
}
)
)
path.write_text("\n".join(lines) + "\n")
def test_summary_counts_all_records_beyond_default_limit(self, tmp_path):
"""With 150 records, summary request_count must be 150, not 100."""
path = tmp_path / "usage.jsonl"
self._write_n_records(path, 150)
tracker = UsageTracker(path=path)
summary = tracker.get_summary()
assert summary["request_count"] == 150
assert summary["total_input_tokens"] == 150 * 10
def test_summary_counts_all_records_beyond_old_hardcoded_limit(self, tmp_path):
"""With 10_001 records, summary must not cap at 10_000."""
path = tmp_path / "usage.jsonl"
self._write_n_records(path, 10_001)
tracker = UsageTracker(path=path)
summary = tracker.get_summary()
assert summary["request_count"] == 10_001
assert summary["total_output_tokens"] == 10_001 * 5
def test_get_records_still_respects_limit(self, tmp_path):
"""get_records(limit=N) is unaffected — it should still cap at N."""
path = tmp_path / "usage.jsonl"
self._write_n_records(path, 200)
tracker = UsageTracker(path=path)
assert len(tracker.get_records(limit=50)) == 50
assert len(tracker.get_records(limit=100)) == 100
def test_summary_since_filter_works_with_all_records(self, tmp_path):
"""The `since` filter must still work when all records are scanned."""
path = tmp_path / "usage.jsonl"
# Write 5 old + 5 new records
old = [
json.dumps(
{
"timestamp": "2025-01-01T00:00:00+00:00",
"backend": "anthropic",
"model": "claude-3-5-sonnet-20241022",
"input_tokens": 1,
"output_tokens": 1,
"cached_input_tokens": 0,
"total_tokens": 2,
"cost_usd": None,
"session_id": "",
}
)
for _ in range(5)
]
new = [
json.dumps(
{
"timestamp": "2026-03-01T00:00:00+00:00",
"backend": "anthropic",
"model": "claude-3-5-sonnet-20241022",
"input_tokens": 10,
"output_tokens": 10,
"cached_input_tokens": 0,
"total_tokens": 20,
"cost_usd": None,
"session_id": "",
}
)
for _ in range(5)
]
path.write_text("\n".join(old + new) + "\n")
tracker = UsageTracker(path=path)
summary = tracker.get_summary(since="2026-01-01T00:00:00+00:00")
assert summary["request_count"] == 5
assert summary["total_input_tokens"] == 50
# ---------------------------------------------------------------------------
# _estimate_cost sanity checks
# ---------------------------------------------------------------------------
class TestEstimateCost:
def test_known_model(self):
cost = _estimate_cost("gpt-4o-mini", 1_000_000, 0)
assert cost == pytest.approx(0.15, rel=1e-3)
def test_prefix_match(self):
# "gpt-4o-2024-11-20" should match "gpt-4o" pricing
cost = _estimate_cost("gpt-4o-2024-11-20", 1_000_000, 0)
assert cost == pytest.approx(2.50, rel=1e-3)
def test_unknown_model_returns_none(self):
assert _estimate_cost("unknown-model-xyz", 100, 50) is None
def test_cached_input_billed_at_lower_rate(self):
# For claude-3-5-sonnet: input=3.0, cached_input=0.30, output=15.0
# 0 fresh input, 1M cached, 0 output → 0.30 USD
cost = _estimate_cost("claude-3-5-sonnet-20241022", 0, 0, cached_input_tokens=1_000_000)
assert cost == pytest.approx(0.30, rel=1e-3)