fix: support nested .gitignore rules during mining
This commit is contained in:
+177
-51
@@ -1,82 +1,208 @@
|
||||
import os
|
||||
import tempfile
|
||||
import shutil
|
||||
import yaml
|
||||
import chromadb
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import chromadb
|
||||
import yaml
|
||||
|
||||
from mempalace.miner import mine, scan_project
|
||||
|
||||
|
||||
def write_file(path: Path, content: str):
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(content, encoding="utf-8")
|
||||
|
||||
|
||||
def scanned_files(project_root: Path, **kwargs):
|
||||
files = scan_project(str(project_root), **kwargs)
|
||||
return sorted(path.relative_to(project_root).as_posix() for path in files)
|
||||
|
||||
|
||||
def test_project_mining():
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
# Create a mini project
|
||||
os.makedirs(os.path.join(tmpdir, "backend"))
|
||||
with open(os.path.join(tmpdir, "backend", "app.py"), "w") as f:
|
||||
f.write("def main():\n print('hello world')\n" * 20)
|
||||
# Create config
|
||||
with open(os.path.join(tmpdir, "mempalace.yaml"), "w") as f:
|
||||
yaml.dump(
|
||||
{
|
||||
"wing": "test_project",
|
||||
"rooms": [
|
||||
{"name": "backend", "description": "Backend code"},
|
||||
{"name": "general", "description": "General"},
|
||||
],
|
||||
},
|
||||
f,
|
||||
try:
|
||||
project_root = Path(tmpdir).resolve()
|
||||
os.makedirs(project_root / "backend")
|
||||
|
||||
write_file(
|
||||
project_root / "backend" / "app.py", "def main():\n print('hello world')\n" * 20
|
||||
)
|
||||
with open(project_root / "mempalace.yaml", "w") as f:
|
||||
yaml.dump(
|
||||
{
|
||||
"wing": "test_project",
|
||||
"rooms": [
|
||||
{"name": "backend", "description": "Backend code"},
|
||||
{"name": "general", "description": "General"},
|
||||
],
|
||||
},
|
||||
f,
|
||||
)
|
||||
|
||||
palace_path = os.path.join(tmpdir, "palace")
|
||||
mine(tmpdir, palace_path)
|
||||
palace_path = project_root / "palace"
|
||||
mine(str(project_root), str(palace_path))
|
||||
|
||||
# Verify
|
||||
client = chromadb.PersistentClient(path=palace_path)
|
||||
col = client.get_collection("mempalace_drawers")
|
||||
assert col.count() > 0
|
||||
|
||||
shutil.rmtree(tmpdir)
|
||||
client = chromadb.PersistentClient(path=str(palace_path))
|
||||
col = client.get_collection("mempalace_drawers")
|
||||
assert col.count() > 0
|
||||
finally:
|
||||
shutil.rmtree(tmpdir)
|
||||
|
||||
|
||||
def test_scan_project_respects_gitignore():
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
try:
|
||||
project_root = Path(tmpdir).resolve()
|
||||
os.makedirs(project_root / "src")
|
||||
os.makedirs(project_root / "generated")
|
||||
|
||||
(project_root / ".gitignore").write_text("ignored.py\ngenerated/\n", encoding="utf-8")
|
||||
(project_root / "src" / "app.py").write_text("print('hello')\n" * 20, encoding="utf-8")
|
||||
(project_root / "ignored.py").write_text("print('ignore me')\n" * 20, encoding="utf-8")
|
||||
(project_root / "generated" / "artifact.py").write_text(
|
||||
"print('ignore this dir')\n" * 20,
|
||||
encoding="utf-8",
|
||||
)
|
||||
write_file(project_root / ".gitignore", "ignored.py\ngenerated/\n")
|
||||
write_file(project_root / "src" / "app.py", "print('hello')\n" * 20)
|
||||
write_file(project_root / "ignored.py", "print('ignore me')\n" * 20)
|
||||
write_file(project_root / "generated" / "artifact.py", "print('artifact')\n" * 20)
|
||||
|
||||
files = scan_project(str(project_root))
|
||||
relative_files = sorted(path.relative_to(project_root).as_posix() for path in files)
|
||||
|
||||
assert relative_files == ["src/app.py"]
|
||||
assert scanned_files(project_root) == ["src/app.py"]
|
||||
finally:
|
||||
shutil.rmtree(tmpdir)
|
||||
|
||||
|
||||
def test_scan_project_handles_gitignore_negation():
|
||||
def test_scan_project_respects_nested_gitignore():
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
try:
|
||||
project_root = Path(tmpdir).resolve()
|
||||
os.makedirs(project_root / "generated")
|
||||
|
||||
(project_root / ".gitignore").write_text(
|
||||
"generated/\n!generated/keep.py\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
(project_root / "generated" / "drop.py").write_text("print('drop')\n" * 20, encoding="utf-8")
|
||||
(project_root / "generated" / "keep.py").write_text("print('keep')\n" * 20, encoding="utf-8")
|
||||
write_file(project_root / ".gitignore", "*.log\n")
|
||||
write_file(project_root / "subrepo" / ".gitignore", "tasks/\n")
|
||||
write_file(project_root / "subrepo" / "src" / "main.py", "print('main')\n" * 20)
|
||||
write_file(project_root / "subrepo" / "tasks" / "task.py", "print('task')\n" * 20)
|
||||
write_file(project_root / "subrepo" / "debug.log", "debug\n" * 20)
|
||||
|
||||
files = scan_project(str(project_root))
|
||||
relative_files = sorted(path.relative_to(project_root).as_posix() for path in files)
|
||||
|
||||
assert relative_files == ["generated/keep.py"]
|
||||
assert scanned_files(project_root) == ["subrepo/src/main.py"]
|
||||
finally:
|
||||
shutil.rmtree(tmpdir)
|
||||
|
||||
|
||||
def test_scan_project_allows_nested_gitignore_override():
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
try:
|
||||
project_root = Path(tmpdir).resolve()
|
||||
|
||||
write_file(project_root / ".gitignore", "*.csv\n")
|
||||
write_file(project_root / "subrepo" / ".gitignore", "!keep.csv\n")
|
||||
write_file(project_root / "drop.csv", "a,b,c\n" * 20)
|
||||
write_file(project_root / "subrepo" / "keep.csv", "a,b,c\n" * 20)
|
||||
|
||||
assert scanned_files(project_root) == ["subrepo/keep.csv"]
|
||||
finally:
|
||||
shutil.rmtree(tmpdir)
|
||||
|
||||
|
||||
def test_scan_project_allows_gitignore_negation_when_parent_dir_is_visible():
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
try:
|
||||
project_root = Path(tmpdir).resolve()
|
||||
|
||||
write_file(project_root / ".gitignore", "generated/*\n!generated/keep.py\n")
|
||||
write_file(project_root / "generated" / "drop.py", "print('drop')\n" * 20)
|
||||
write_file(project_root / "generated" / "keep.py", "print('keep')\n" * 20)
|
||||
|
||||
assert scanned_files(project_root) == ["generated/keep.py"]
|
||||
finally:
|
||||
shutil.rmtree(tmpdir)
|
||||
|
||||
|
||||
def test_scan_project_does_not_reinclude_file_from_ignored_directory():
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
try:
|
||||
project_root = Path(tmpdir).resolve()
|
||||
|
||||
write_file(project_root / ".gitignore", "generated/\n!generated/keep.py\n")
|
||||
write_file(project_root / "generated" / "drop.py", "print('drop')\n" * 20)
|
||||
write_file(project_root / "generated" / "keep.py", "print('keep')\n" * 20)
|
||||
|
||||
assert scanned_files(project_root) == []
|
||||
finally:
|
||||
shutil.rmtree(tmpdir)
|
||||
|
||||
|
||||
def test_scan_project_can_disable_gitignore():
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
try:
|
||||
project_root = Path(tmpdir).resolve()
|
||||
|
||||
write_file(project_root / ".gitignore", "data/\n")
|
||||
write_file(project_root / "data" / "stuff.csv", "a,b,c\n" * 20)
|
||||
|
||||
assert scanned_files(project_root, respect_gitignore=False) == ["data/stuff.csv"]
|
||||
finally:
|
||||
shutil.rmtree(tmpdir)
|
||||
|
||||
|
||||
def test_scan_project_can_include_ignored_directory():
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
try:
|
||||
project_root = Path(tmpdir).resolve()
|
||||
|
||||
write_file(project_root / ".gitignore", "docs/\n")
|
||||
write_file(project_root / "docs" / "guide.md", "# Guide\n" * 20)
|
||||
|
||||
assert scanned_files(project_root, include_ignored=["docs"]) == ["docs/guide.md"]
|
||||
finally:
|
||||
shutil.rmtree(tmpdir)
|
||||
|
||||
|
||||
def test_scan_project_can_include_specific_ignored_file():
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
try:
|
||||
project_root = Path(tmpdir).resolve()
|
||||
|
||||
write_file(project_root / ".gitignore", "generated/\n")
|
||||
write_file(project_root / "generated" / "drop.py", "print('drop')\n" * 20)
|
||||
write_file(project_root / "generated" / "keep.py", "print('keep')\n" * 20)
|
||||
|
||||
assert scanned_files(project_root, include_ignored=["generated/keep.py"]) == [
|
||||
"generated/keep.py"
|
||||
]
|
||||
finally:
|
||||
shutil.rmtree(tmpdir)
|
||||
|
||||
|
||||
def test_scan_project_can_include_exact_file_without_known_extension():
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
try:
|
||||
project_root = Path(tmpdir).resolve()
|
||||
|
||||
write_file(project_root / ".gitignore", "README\n")
|
||||
write_file(project_root / "README", "hello\n" * 20)
|
||||
|
||||
assert scanned_files(project_root, include_ignored=["README"]) == ["README"]
|
||||
finally:
|
||||
shutil.rmtree(tmpdir)
|
||||
|
||||
|
||||
def test_scan_project_include_override_beats_skip_dirs():
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
try:
|
||||
project_root = Path(tmpdir).resolve()
|
||||
|
||||
write_file(project_root / ".pytest_cache" / "cache.py", "print('cache')\n" * 20)
|
||||
|
||||
assert scanned_files(
|
||||
project_root,
|
||||
respect_gitignore=False,
|
||||
include_ignored=[".pytest_cache"],
|
||||
) == [".pytest_cache/cache.py"]
|
||||
finally:
|
||||
shutil.rmtree(tmpdir)
|
||||
|
||||
|
||||
def test_scan_project_skip_dirs_still_apply_without_override():
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
try:
|
||||
project_root = Path(tmpdir).resolve()
|
||||
|
||||
write_file(project_root / ".pytest_cache" / "cache.py", "print('cache')\n" * 20)
|
||||
write_file(project_root / "main.py", "print('main')\n" * 20)
|
||||
|
||||
assert scanned_files(project_root, respect_gitignore=False) == ["main.py"]
|
||||
finally:
|
||||
shutil.rmtree(tmpdir)
|
||||
|
||||
Reference in New Issue
Block a user