fix: resolve formatting, regression logic, and pytest defaults

- Run ruff format on all benchmark files (fixes CI lint job)
- Fix check_regression() substring ambiguity: ordered keyword matching
  so "latency_improvement_pct" is correctly classified as higher-is-better
- Update stale comments in conftest.py referencing wrong fixture
- Add pytest addopts to skip benchmark/slow/stress markers by default
This commit is contained in:
Igor Lins e Silva
2026-04-08 10:56:39 -03:00
parent 7e4db33061
commit ebc26f3960
12 changed files with 383 additions and 138 deletions
+17 -8
View File
@@ -77,9 +77,7 @@ class TestSearchRecallAtScale:
total_needle_queries = min(10, len(needle_info))
for needle in needle_info[:total_needle_queries]:
result = search_memories(
needle["query"], palace_path=palace_path, n_results=10
)
result = search_memories(needle["query"], palace_path=palace_path, n_results=10)
if "error" in result:
continue
@@ -150,8 +148,12 @@ class TestSearchFilteredVsUnfiltered:
record_metric("search_filter", "avg_unfiltered_ms", round(avg_unfiltered, 1))
record_metric("search_filter", "avg_filtered_ms", round(avg_filtered, 1))
record_metric("search_filter", "latency_improvement_pct", round(latency_improvement, 1))
record_metric("search_filter", "unfiltered_recall_at_5", round(unfiltered_hits / max(n_queries, 1), 3))
record_metric("search_filter", "filtered_recall_at_5", round(filtered_hits / max(n_queries, 1), 3))
record_metric(
"search_filter", "unfiltered_recall_at_5", round(unfiltered_hits / max(n_queries, 1), 3)
)
record_metric(
"search_filter", "filtered_recall_at_5", round(filtered_hits / max(n_queries, 1), 3)
)
@pytest.mark.benchmark
@@ -167,9 +169,16 @@ class TestConcurrentSearch:
from mempalace.searcher import search_memories
queries = [
"authentication", "database", "deployment", "error handling",
"testing", "monitoring", "caching", "middleware",
"serialization", "validation",
"authentication",
"database",
"deployment",
"error handling",
"testing",
"monitoring",
"caching",
"middleware",
"serialization",
"validation",
] * 3 # 30 total queries
def run_search(query):