fix: use actual detected room in mine summary stats

process_file() now returns (drawer_count, room) instead of just
drawer_count. The mine summary uses the returned room directly
instead of re-calling detect_room with empty content, which
produced wrong stats when routing relied on content keywords.
This commit is contained in:
adv3nt3
2026-04-08 00:57:58 +02:00
parent 71736a3f4f
commit 75eb7ff871
+8 -9
View File
@@ -451,29 +451,29 @@ def process_file(
rooms: list, rooms: list,
agent: str, agent: str,
dry_run: bool, dry_run: bool,
) -> int: ) -> tuple:
"""Read, chunk, route, and file one file. Returns drawer count.""" """Read, chunk, route, and file one file. Returns (drawer_count, room_name)."""
# Skip if already filed # Skip if already filed
source_file = str(filepath) source_file = str(filepath)
if not dry_run and file_already_mined(collection, source_file): if not dry_run and file_already_mined(collection, source_file):
return 0 return 0, None
try: try:
content = filepath.read_text(encoding="utf-8", errors="replace") content = filepath.read_text(encoding="utf-8", errors="replace")
except OSError: except OSError:
return 0 return 0, None
content = content.strip() content = content.strip()
if len(content) < MIN_CHUNK_SIZE: if len(content) < MIN_CHUNK_SIZE:
return 0 return 0, None
room = detect_room(filepath, content, rooms, project_path) room = detect_room(filepath, content, rooms, project_path)
chunks = chunk_text(content, source_file) chunks = chunk_text(content, source_file)
if dry_run: if dry_run:
print(f" [DRY RUN] {filepath.name} → room:{room} ({len(chunks)} drawers)") print(f" [DRY RUN] {filepath.name} → room:{room} ({len(chunks)} drawers)")
return len(chunks) return len(chunks), room
drawers_added = 0 drawers_added = 0
for chunk in chunks: for chunk in chunks:
@@ -489,7 +489,7 @@ def process_file(
if added: if added:
drawers_added += 1 drawers_added += 1
return drawers_added return drawers_added, room
# ============================================================================= # =============================================================================
@@ -608,7 +608,7 @@ def mine(
room_counts = defaultdict(int) room_counts = defaultdict(int)
for i, filepath in enumerate(files, 1): for i, filepath in enumerate(files, 1):
drawers = process_file( drawers, room = process_file(
filepath=filepath, filepath=filepath,
project_path=project_path, project_path=project_path,
collection=collection, collection=collection,
@@ -621,7 +621,6 @@ def mine(
files_skipped += 1 files_skipped += 1
else: else:
total_drawers += drawers total_drawers += drawers
room = detect_room(filepath, "", rooms, project_path)
room_counts[room] += 1 room_counts[room] += 1
if not dry_run: if not dry_run:
print(f" ✓ [{i:4}/{len(files)}] {filepath.name[:50]:50} +{drawers}") print(f" ✓ [{i:4}/{len(files)}] {filepath.name[:50]:50} +{drawers}")