pocketpaw/tests/test_usage_tracker.py

"""Tests for usage_tracker.py — UsageTracker fixes.

[FI] Fix: two bugs in UsageTracker:

1. total_tokens excluded cached_input_tokens.
   In `record()`, total was computed as `input_tokens + output_tokens`,
   silently dropping cached tokens from the count even though they are real
   tokens processed by the model.

2. get_summary() called get_records(limit=10_000) instead of reading all
   records, so any installation with more than 10 000 lifetime records would
   silently produce wrong (understated) aggregation totals.
"""

from __future__ import annotations

import json

import pytest

from pocketpaw.usage_tracker import UsageTracker, _estimate_cost

# ---------------------------------------------------------------------------
# Bug 1 – total_tokens must include cached_input_tokens
# ---------------------------------------------------------------------------


class TestTotalTokensIncludesCachedInput:
    """total_tokens = input + output + cached_input (not just input + output)."""

    def test_total_tokens_with_cached(self, tmp_path):
        tracker = UsageTracker(path=tmp_path / "usage.jsonl")
        rec = tracker.record(
            backend="anthropic",
            model="claude-3-5-sonnet-20241022",
            input_tokens=100,
            output_tokens=50,
            cached_input_tokens=200,
        )
        assert rec.total_tokens == 350  # 100 + 50 + 200

    def test_total_tokens_without_cached(self, tmp_path):
        tracker = UsageTracker(path=tmp_path / "usage.jsonl")
        rec = tracker.record(
            backend="openai",
            model="gpt-4o",
            input_tokens=80,
            output_tokens=40,
            cached_input_tokens=0,
        )
        assert rec.total_tokens == 120  # 80 + 40 + 0

    def test_total_tokens_persisted_correctly(self, tmp_path):
        path = tmp_path / "usage.jsonl"
        tracker = UsageTracker(path=path)
        tracker.record(
            backend="anthropic",
            model="claude-3-5-sonnet-20241022",
            input_tokens=10,
            output_tokens=20,
            cached_input_tokens=30,
        )
        line = path.read_text().strip()
        data = json.loads(line)
        assert data["total_tokens"] == 60  # 10 + 20 + 30

    def test_summary_total_tokens_includes_cached(self, tmp_path):
        tracker = UsageTracker(path=tmp_path / "usage.jsonl")
        tracker.record(
            backend="anthropic",
            model="claude-3-5-sonnet-20241022",
            input_tokens=100,
            output_tokens=50,
            cached_input_tokens=200,
        )
        tracker.record(
            backend="anthropic",
            model="claude-3-5-sonnet-20241022",
            input_tokens=50,
            output_tokens=25,
            cached_input_tokens=100,
        )
        summary = tracker.get_summary()
        # (100+50+200) + (50+25+100) = 350 + 175 = 525
        assert summary["total_tokens"] == 525
        assert summary["total_cached_input_tokens"] == 300


# ---------------------------------------------------------------------------
# Bug 2 – get_summary() must aggregate ALL records, not just the last 10 000
# ---------------------------------------------------------------------------


class TestSummaryCoversAllRecords:
    """get_summary() should cover every record ever written."""

    def _write_n_records(self, path, n: int) -> None:
        """Write n minimal records directly to the JSONL file."""
        lines = []
        for i in range(n):
            lines.append(
                json.dumps(
                    {
                        "timestamp": f"2026-01-{(i % 28) + 1:02d}T00:00:00+00:00",
                        "backend": "openai",
                        "model": "gpt-4o-mini",
                        "input_tokens": 10,
                        "output_tokens": 5,
                        "cached_input_tokens": 0,
                        "total_tokens": 15,
                        "cost_usd": None,
                        "session_id": "",
                    }
                )
            )
        path.write_text("\n".join(lines) + "\n")

    def test_summary_counts_all_records_beyond_default_limit(self, tmp_path):
        """With 150 records, summary request_count must be 150, not 100."""
        path = tmp_path / "usage.jsonl"
        self._write_n_records(path, 150)
        tracker = UsageTracker(path=path)
        summary = tracker.get_summary()
        assert summary["request_count"] == 150
        assert summary["total_input_tokens"] == 150 * 10

    def test_summary_counts_all_records_beyond_old_hardcoded_limit(self, tmp_path):
        """With 10_001 records, summary must not cap at 10_000."""
        path = tmp_path / "usage.jsonl"
        self._write_n_records(path, 10_001)
        tracker = UsageTracker(path=path)
        summary = tracker.get_summary()
        assert summary["request_count"] == 10_001
        assert summary["total_output_tokens"] == 10_001 * 5

    def test_get_records_still_respects_limit(self, tmp_path):
        """get_records(limit=N) is unaffected — it should still cap at N."""
        path = tmp_path / "usage.jsonl"
        self._write_n_records(path, 200)
        tracker = UsageTracker(path=path)
        assert len(tracker.get_records(limit=50)) == 50
        assert len(tracker.get_records(limit=100)) == 100

    def test_summary_since_filter_works_with_all_records(self, tmp_path):
        """The `since` filter must still work when all records are scanned."""
        path = tmp_path / "usage.jsonl"
        # Write 5 old + 5 new records
        old = [
            json.dumps(
                {
                    "timestamp": "2025-01-01T00:00:00+00:00",
                    "backend": "anthropic",
                    "model": "claude-3-5-sonnet-20241022",
                    "input_tokens": 1,
                    "output_tokens": 1,
                    "cached_input_tokens": 0,
                    "total_tokens": 2,
                    "cost_usd": None,
                    "session_id": "",
                }
            )
            for _ in range(5)
        ]
        new = [
            json.dumps(
                {
                    "timestamp": "2026-03-01T00:00:00+00:00",
                    "backend": "anthropic",
                    "model": "claude-3-5-sonnet-20241022",
                    "input_tokens": 10,
                    "output_tokens": 10,
                    "cached_input_tokens": 0,
                    "total_tokens": 20,
                    "cost_usd": None,
                    "session_id": "",
                }
            )
            for _ in range(5)
        ]
        path.write_text("\n".join(old + new) + "\n")
        tracker = UsageTracker(path=path)
        summary = tracker.get_summary(since="2026-01-01T00:00:00+00:00")
        assert summary["request_count"] == 5
        assert summary["total_input_tokens"] == 50


# ---------------------------------------------------------------------------
# _estimate_cost sanity checks
# ---------------------------------------------------------------------------


class TestEstimateCost:
    def test_known_model(self):
        cost = _estimate_cost("gpt-4o-mini", 1_000_000, 0)
        assert cost == pytest.approx(0.15, rel=1e-3)

    def test_prefix_match(self):
        # "gpt-4o-2024-11-20" should match "gpt-4o" pricing
        cost = _estimate_cost("gpt-4o-2024-11-20", 1_000_000, 0)
        assert cost == pytest.approx(2.50, rel=1e-3)

    def test_unknown_model_returns_none(self):
        assert _estimate_cost("unknown-model-xyz", 100, 50) is None

    def test_cached_input_billed_at_lower_rate(self):
        # For claude-3-5-sonnet: input=3.0, cached_input=0.30, output=15.0
        # 0 fresh input, 1M cached, 0 output → 0.30 USD
        cost = _estimate_cost("claude-3-5-sonnet-20241022", 0, 0, cached_input_tokens=1_000_000)
        assert cost == pytest.approx(0.30, rel=1e-3)