fix(init): case-insensitive project dedup across manifest and convo sources
`discover_entities` was deduping the convo_scanner results against the manifest/git scan with a case-sensitive key, while every other dedup path in the pipeline (`_merge_detected`, `miner.add_to_known_entities`) uses case-insensitive matching. A project named `foo` in a manifest plus `Foo` as a Claude Code `cwd` variant would surface as two review entries instead of collapsing to one. Fix keys `by_name` by `name.lower()` while preserving the first-seen casing, matching the rest of the pipeline. Flagged by Copilot on #1175. Regression test asserts a manifest project + a CamelCase-variant convo cwd for the same real project collapse to one entry.
This commit is contained in:
@@ -627,13 +627,18 @@ def discover_entities(
|
||||
root_path = Path(project_dir).expanduser().resolve()
|
||||
if is_claude_projects_root(root_path):
|
||||
convo_projects = scan_claude_projects(root_path)
|
||||
# Dedup by name against the git-manifest list, preferring entries with
|
||||
# more user_commits as signal strength.
|
||||
by_name: dict[str, ProjectInfo] = {p.name: p for p in projects}
|
||||
# Dedup by name against the git-manifest list, preferring entries
|
||||
# with more user_commits as signal strength. Keyed case-insensitively
|
||||
# so a `pyproject.toml` name like `mempalace` and a Claude Code
|
||||
# `cwd` variant like `MemPalace` collapse into one entry — matches
|
||||
# the case-insensitive dedup used in `_merge_detected` and
|
||||
# `miner.add_to_known_entities`.
|
||||
by_name: dict[str, ProjectInfo] = {p.name.lower(): p for p in projects}
|
||||
for cp in convo_projects:
|
||||
existing = by_name.get(cp.name)
|
||||
key = cp.name.lower()
|
||||
existing = by_name.get(key)
|
||||
if existing is None or cp.user_commits > existing.user_commits:
|
||||
by_name[cp.name] = cp
|
||||
by_name[key] = cp
|
||||
projects = sorted(
|
||||
by_name.values(),
|
||||
key=lambda p: (not p.is_mine, -p.user_commits, -p.total_commits, p.name),
|
||||
|
||||
@@ -524,6 +524,34 @@ def test_discover_entities_keeps_llm_only_project_uncertain_when_real_signal(tmp
|
||||
assert "Terraform" in [e["name"] for e in d["uncertain"]]
|
||||
|
||||
|
||||
def test_discover_entities_collapses_case_variants_between_manifest_and_convo(tmp_path):
|
||||
"""A project named `myproj` in a manifest and `MyProj` as a Claude Code
|
||||
cwd must collapse into one entry. Matches the case-insensitive dedup
|
||||
used by `_merge_detected` and `miner.add_to_known_entities`."""
|
||||
root = tmp_path / "projects_root"
|
||||
root.mkdir()
|
||||
|
||||
# Entry 1: a git+manifest project named lowercase `myproj`
|
||||
repo = root / "-home-u-src-myproj"
|
||||
repo.mkdir()
|
||||
(repo / "package.json").write_text(json.dumps({"name": "myproj"}))
|
||||
_init_git_repo(repo)
|
||||
|
||||
# Entry 2: same root ALSO looks like a Claude Code `.claude/projects/` dir;
|
||||
# the convo_scanner inside will resolve `cwd` to `/home/u/src/MyProj`
|
||||
# (CamelCase variant of the same project).
|
||||
session = repo / "abc.jsonl"
|
||||
session.write_text(json.dumps({"type": "user", "cwd": "/home/u/src/MyProj"}) + "\n")
|
||||
|
||||
d = discover_entities(str(root))
|
||||
|
||||
project_names = [e["name"] for e in d["projects"]]
|
||||
# One entry, not two. First-seen casing ("myproj" from the manifest scan)
|
||||
# is the winner since it was seeded first.
|
||||
assert len(project_names) == 1
|
||||
assert project_names[0].lower() == "myproj"
|
||||
|
||||
|
||||
# ── _UnionFind basics ──────────────────────────────────────────────────
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user