fix: resolve formatting, regression logic, and pytest defaults

- Run ruff format on all benchmark files (fixes CI lint job)
- Fix check_regression() substring ambiguity: ordered keyword matching
  so "latency_improvement_pct" is correctly classified as higher-is-better
- Update stale comments in conftest.py referencing wrong fixture
- Add pytest addopts to skip benchmark/slow/stress markers by default
This commit is contained in:
Igor Lins e Silva
2026-04-08 10:56:39 -03:00
parent 7e4db33061
commit ebc26f3960
12 changed files with 383 additions and 138 deletions
+6 -2
View File
@@ -116,7 +116,9 @@ class TestLayer1UnboundedFetch:
record_metric("layer1_filter", "unfiltered_ms", round(unfiltered_ms, 1))
record_metric("layer1_filter", "filtered_ms", round(filtered_ms, 1))
if unfiltered_ms > 0:
record_metric("layer1_filter", "speedup_pct", round((1 - filtered_ms / unfiltered_ms) * 100, 1))
record_metric(
"layer1_filter", "speedup_pct", round((1 - filtered_ms / unfiltered_ms) * 100, 1)
)
@pytest.mark.benchmark
@@ -146,7 +148,9 @@ class TestWakeUpTokenBudget:
record_metric("wakeup_budget", f"tokens_at_{n_drawers}", token_estimate)
record_metric("wakeup_budget", f"chars_at_{n_drawers}", len(text))
assert token_estimate < 1200, f"Wake-up exceeded budget: ~{token_estimate} tokens at {n_drawers} drawers"
assert token_estimate < 1200, (
f"Wake-up exceeded budget: ~{token_estimate} tokens at {n_drawers} drawers"
)
@pytest.mark.benchmark