diff --git a/mempalace/project_scanner.py b/mempalace/project_scanner.py index bbdb6f4..741a3e2 100644 --- a/mempalace/project_scanner.py +++ b/mempalace/project_scanner.py @@ -627,13 +627,18 @@ def discover_entities( root_path = Path(project_dir).expanduser().resolve() if is_claude_projects_root(root_path): convo_projects = scan_claude_projects(root_path) - # Dedup by name against the git-manifest list, preferring entries with - # more user_commits as signal strength. - by_name: dict[str, ProjectInfo] = {p.name: p for p in projects} + # Dedup by name against the git-manifest list, preferring entries + # with more user_commits as signal strength. Keyed case-insensitively + # so a `pyproject.toml` name like `mempalace` and a Claude Code + # `cwd` variant like `MemPalace` collapse into one entry — matches + # the case-insensitive dedup used in `_merge_detected` and + # `miner.add_to_known_entities`. + by_name: dict[str, ProjectInfo] = {p.name.lower(): p for p in projects} for cp in convo_projects: - existing = by_name.get(cp.name) + key = cp.name.lower() + existing = by_name.get(key) if existing is None or cp.user_commits > existing.user_commits: - by_name[cp.name] = cp + by_name[key] = cp projects = sorted( by_name.values(), key=lambda p: (not p.is_mine, -p.user_commits, -p.total_commits, p.name), diff --git a/tests/test_project_scanner.py b/tests/test_project_scanner.py index 2dc939a..49126b4 100644 --- a/tests/test_project_scanner.py +++ b/tests/test_project_scanner.py @@ -524,6 +524,34 @@ def test_discover_entities_keeps_llm_only_project_uncertain_when_real_signal(tmp assert "Terraform" in [e["name"] for e in d["uncertain"]] +def test_discover_entities_collapses_case_variants_between_manifest_and_convo(tmp_path): + """A project named `myproj` in a manifest and `MyProj` as a Claude Code + cwd must collapse into one entry. Matches the case-insensitive dedup + used by `_merge_detected` and `miner.add_to_known_entities`.""" + root = tmp_path / "projects_root" + root.mkdir() + + # Entry 1: a git+manifest project named lowercase `myproj` + repo = root / "-home-u-src-myproj" + repo.mkdir() + (repo / "package.json").write_text(json.dumps({"name": "myproj"})) + _init_git_repo(repo) + + # Entry 2: same root ALSO looks like a Claude Code `.claude/projects/` dir; + # the convo_scanner inside will resolve `cwd` to `/home/u/src/MyProj` + # (CamelCase variant of the same project). + session = repo / "abc.jsonl" + session.write_text(json.dumps({"type": "user", "cwd": "/home/u/src/MyProj"}) + "\n") + + d = discover_entities(str(root)) + + project_names = [e["name"] for e in d["projects"]] + # One entry, not two. First-seen casing ("myproj" from the manifest scan) + # is the winner since it was seeded first. + assert len(project_names) == 1 + assert project_names[0].lower() == "myproj" + + # ── _UnionFind basics ──────────────────────────────────────────────────