Merge pull request #78 from ac-opensource/feature/respect-gitignore-mining
Respect nested .gitignore rules when mining project files
This commit is contained in:
@@ -65,6 +65,9 @@ def cmd_init(args):
|
|||||||
|
|
||||||
def cmd_mine(args):
|
def cmd_mine(args):
|
||||||
palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path
|
palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path
|
||||||
|
include_ignored = []
|
||||||
|
for raw in args.include_ignored or []:
|
||||||
|
include_ignored.extend(part.strip() for part in raw.split(",") if part.strip())
|
||||||
|
|
||||||
if args.mode == "convos":
|
if args.mode == "convos":
|
||||||
from .convo_miner import mine_convos
|
from .convo_miner import mine_convos
|
||||||
@@ -88,6 +91,8 @@ def cmd_mine(args):
|
|||||||
agent=args.agent,
|
agent=args.agent,
|
||||||
limit=args.limit,
|
limit=args.limit,
|
||||||
dry_run=args.dry_run,
|
dry_run=args.dry_run,
|
||||||
|
respect_gitignore=not args.no_gitignore,
|
||||||
|
include_ignored=include_ignored,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -359,6 +364,17 @@ def main():
|
|||||||
help="Ingest mode: 'projects' for code/docs (default), 'convos' for chat exports",
|
help="Ingest mode: 'projects' for code/docs (default), 'convos' for chat exports",
|
||||||
)
|
)
|
||||||
p_mine.add_argument("--wing", default=None, help="Wing name (default: directory name)")
|
p_mine.add_argument("--wing", default=None, help="Wing name (default: directory name)")
|
||||||
|
p_mine.add_argument(
|
||||||
|
"--no-gitignore",
|
||||||
|
action="store_true",
|
||||||
|
help="Don't respect .gitignore files when scanning project files",
|
||||||
|
)
|
||||||
|
p_mine.add_argument(
|
||||||
|
"--include-ignored",
|
||||||
|
action="append",
|
||||||
|
default=[],
|
||||||
|
help="Always scan these project-relative paths even if ignored; repeat or pass comma-separated paths",
|
||||||
|
)
|
||||||
p_mine.add_argument(
|
p_mine.add_argument(
|
||||||
"--agent",
|
"--agent",
|
||||||
default="mempalace",
|
default="mempalace",
|
||||||
|
|||||||
+269
-15
@@ -10,6 +10,7 @@ Stores verbatim chunks as drawers. No summaries. Ever.
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import fnmatch
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
@@ -51,6 +52,27 @@ SKIP_DIRS = {
|
|||||||
".next",
|
".next",
|
||||||
"coverage",
|
"coverage",
|
||||||
".mempalace",
|
".mempalace",
|
||||||
|
".ruff_cache",
|
||||||
|
".mypy_cache",
|
||||||
|
".pytest_cache",
|
||||||
|
".cache",
|
||||||
|
".tox",
|
||||||
|
".nox",
|
||||||
|
".idea",
|
||||||
|
".vscode",
|
||||||
|
".ipynb_checkpoints",
|
||||||
|
".eggs",
|
||||||
|
"htmlcov",
|
||||||
|
"target",
|
||||||
|
}
|
||||||
|
|
||||||
|
SKIP_FILENAMES = {
|
||||||
|
"mempalace.yaml",
|
||||||
|
"mempalace.yml",
|
||||||
|
"mempal.yaml",
|
||||||
|
"mempal.yml",
|
||||||
|
".gitignore",
|
||||||
|
"package-lock.json",
|
||||||
}
|
}
|
||||||
|
|
||||||
CHUNK_SIZE = 800 # chars per drawer
|
CHUNK_SIZE = 800 # chars per drawer
|
||||||
@@ -58,6 +80,196 @@ CHUNK_OVERLAP = 100 # overlap between chunks
|
|||||||
MIN_CHUNK_SIZE = 50 # skip tiny chunks
|
MIN_CHUNK_SIZE = 50 # skip tiny chunks
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# IGNORE MATCHING
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class GitignoreMatcher:
|
||||||
|
"""Lightweight matcher for one directory's .gitignore patterns."""
|
||||||
|
|
||||||
|
def __init__(self, base_dir: Path, rules: list):
|
||||||
|
self.base_dir = base_dir
|
||||||
|
self.rules = rules
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dir(cls, dir_path: Path):
|
||||||
|
gitignore_path = dir_path / ".gitignore"
|
||||||
|
if not gitignore_path.is_file():
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
lines = gitignore_path.read_text(encoding="utf-8", errors="replace").splitlines()
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
rules = []
|
||||||
|
for raw_line in lines:
|
||||||
|
line = raw_line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if line.startswith("\\#") or line.startswith("\\!"):
|
||||||
|
line = line[1:]
|
||||||
|
elif line.startswith("#"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
negated = line.startswith("!")
|
||||||
|
if negated:
|
||||||
|
line = line[1:]
|
||||||
|
|
||||||
|
anchored = line.startswith("/")
|
||||||
|
if anchored:
|
||||||
|
line = line.lstrip("/")
|
||||||
|
|
||||||
|
dir_only = line.endswith("/")
|
||||||
|
if dir_only:
|
||||||
|
line = line.rstrip("/")
|
||||||
|
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
|
||||||
|
rules.append(
|
||||||
|
{
|
||||||
|
"pattern": line,
|
||||||
|
"anchored": anchored,
|
||||||
|
"dir_only": dir_only,
|
||||||
|
"negated": negated,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
if not rules:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return cls(dir_path, rules)
|
||||||
|
|
||||||
|
def matches(self, path: Path, is_dir: bool = None):
|
||||||
|
try:
|
||||||
|
relative = path.relative_to(self.base_dir).as_posix().strip("/")
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if not relative:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if is_dir is None:
|
||||||
|
is_dir = path.is_dir()
|
||||||
|
|
||||||
|
ignored = None
|
||||||
|
for rule in self.rules:
|
||||||
|
if self._rule_matches(rule, relative, is_dir):
|
||||||
|
ignored = not rule["negated"]
|
||||||
|
return ignored
|
||||||
|
|
||||||
|
def _rule_matches(self, rule: dict, relative: str, is_dir: bool) -> bool:
|
||||||
|
pattern = rule["pattern"]
|
||||||
|
parts = relative.split("/")
|
||||||
|
pattern_parts = pattern.split("/")
|
||||||
|
|
||||||
|
if rule["dir_only"]:
|
||||||
|
target_parts = parts if is_dir else parts[:-1]
|
||||||
|
if not target_parts:
|
||||||
|
return False
|
||||||
|
if rule["anchored"] or len(pattern_parts) > 1:
|
||||||
|
return self._match_from_root(target_parts, pattern_parts)
|
||||||
|
return any(fnmatch.fnmatch(part, pattern) for part in target_parts)
|
||||||
|
|
||||||
|
if rule["anchored"] or len(pattern_parts) > 1:
|
||||||
|
return self._match_from_root(parts, pattern_parts)
|
||||||
|
|
||||||
|
return any(fnmatch.fnmatch(part, pattern) for part in parts)
|
||||||
|
|
||||||
|
def _match_from_root(self, target_parts: list, pattern_parts: list) -> bool:
|
||||||
|
def matches(path_index: int, pattern_index: int) -> bool:
|
||||||
|
if pattern_index == len(pattern_parts):
|
||||||
|
return True
|
||||||
|
|
||||||
|
if path_index == len(target_parts):
|
||||||
|
return all(part == "**" for part in pattern_parts[pattern_index:])
|
||||||
|
|
||||||
|
pattern_part = pattern_parts[pattern_index]
|
||||||
|
if pattern_part == "**":
|
||||||
|
return matches(path_index, pattern_index + 1) or matches(
|
||||||
|
path_index + 1, pattern_index
|
||||||
|
)
|
||||||
|
|
||||||
|
if not fnmatch.fnmatch(target_parts[path_index], pattern_part):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return matches(path_index + 1, pattern_index + 1)
|
||||||
|
|
||||||
|
return matches(0, 0)
|
||||||
|
|
||||||
|
|
||||||
|
def load_gitignore_matcher(dir_path: Path, cache: dict):
|
||||||
|
"""Load and cache one directory's .gitignore matcher."""
|
||||||
|
if dir_path not in cache:
|
||||||
|
cache[dir_path] = GitignoreMatcher.from_dir(dir_path)
|
||||||
|
return cache[dir_path]
|
||||||
|
|
||||||
|
|
||||||
|
def is_gitignored(path: Path, matchers: list, is_dir: bool = False) -> bool:
|
||||||
|
"""Apply active .gitignore matchers in ancestor order; last match wins."""
|
||||||
|
ignored = False
|
||||||
|
for matcher in matchers:
|
||||||
|
decision = matcher.matches(path, is_dir=is_dir)
|
||||||
|
if decision is not None:
|
||||||
|
ignored = decision
|
||||||
|
return ignored
|
||||||
|
|
||||||
|
|
||||||
|
def should_skip_dir(dirname: str) -> bool:
|
||||||
|
"""Skip known generated/cache directories before gitignore matching."""
|
||||||
|
return dirname in SKIP_DIRS or dirname.endswith(".egg-info")
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_include_paths(include_ignored: list) -> set:
|
||||||
|
"""Normalize comma-parsed include paths into project-relative POSIX strings."""
|
||||||
|
normalized = set()
|
||||||
|
for raw_path in include_ignored or []:
|
||||||
|
candidate = str(raw_path).strip().strip("/")
|
||||||
|
if candidate:
|
||||||
|
normalized.add(Path(candidate).as_posix())
|
||||||
|
return normalized
|
||||||
|
|
||||||
|
|
||||||
|
def is_exact_force_include(path: Path, project_path: Path, include_paths: set) -> bool:
|
||||||
|
"""Return True when a path exactly matches an explicit include override."""
|
||||||
|
if not include_paths:
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
relative = path.relative_to(project_path).as_posix().strip("/")
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return relative in include_paths
|
||||||
|
|
||||||
|
|
||||||
|
def is_force_included(path: Path, project_path: Path, include_paths: set) -> bool:
|
||||||
|
"""Return True when a path or one of its ancestors/descendants was explicitly included."""
|
||||||
|
if not include_paths:
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
relative = path.relative_to(project_path).as_posix().strip("/")
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not relative:
|
||||||
|
return False
|
||||||
|
|
||||||
|
for include_path in include_paths:
|
||||||
|
if relative == include_path:
|
||||||
|
return True
|
||||||
|
if relative.startswith(f"{include_path}/"):
|
||||||
|
return True
|
||||||
|
if include_path.startswith(f"{relative}/"):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# CONFIG
|
# CONFIG
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -284,26 +496,58 @@ def process_file(
|
|||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
def scan_project(project_dir: str) -> list:
|
def scan_project(
|
||||||
|
project_dir: str,
|
||||||
|
respect_gitignore: bool = True,
|
||||||
|
include_ignored: list = None,
|
||||||
|
) -> list:
|
||||||
"""Return list of all readable file paths."""
|
"""Return list of all readable file paths."""
|
||||||
project_path = Path(project_dir).expanduser().resolve()
|
project_path = Path(project_dir).expanduser().resolve()
|
||||||
files = []
|
files = []
|
||||||
|
active_matchers = []
|
||||||
|
matcher_cache = {}
|
||||||
|
include_paths = normalize_include_paths(include_ignored)
|
||||||
|
|
||||||
for root, dirs, filenames in os.walk(project_path):
|
for root, dirs, filenames in os.walk(project_path):
|
||||||
dirs[:] = [d for d in dirs if d not in SKIP_DIRS]
|
root_path = Path(root)
|
||||||
|
|
||||||
|
if respect_gitignore:
|
||||||
|
active_matchers = [
|
||||||
|
matcher
|
||||||
|
for matcher in active_matchers
|
||||||
|
if root_path == matcher.base_dir or matcher.base_dir in root_path.parents
|
||||||
|
]
|
||||||
|
current_matcher = load_gitignore_matcher(root_path, matcher_cache)
|
||||||
|
if current_matcher is not None:
|
||||||
|
active_matchers.append(current_matcher)
|
||||||
|
|
||||||
|
dirs[:] = [
|
||||||
|
d
|
||||||
|
for d in dirs
|
||||||
|
if is_force_included(root_path / d, project_path, include_paths)
|
||||||
|
or not should_skip_dir(d)
|
||||||
|
]
|
||||||
|
if respect_gitignore and active_matchers:
|
||||||
|
dirs[:] = [
|
||||||
|
d
|
||||||
|
for d in dirs
|
||||||
|
if is_force_included(root_path / d, project_path, include_paths)
|
||||||
|
or not is_gitignored(root_path / d, active_matchers, is_dir=True)
|
||||||
|
]
|
||||||
|
|
||||||
for filename in filenames:
|
for filename in filenames:
|
||||||
filepath = Path(root) / filename
|
filepath = root_path / filename
|
||||||
if filepath.suffix.lower() in READABLE_EXTENSIONS:
|
force_include = is_force_included(filepath, project_path, include_paths)
|
||||||
# Skip config files
|
exact_force_include = is_exact_force_include(filepath, project_path, include_paths)
|
||||||
if filename in (
|
|
||||||
"mempalace.yaml",
|
if not force_include and filename in SKIP_FILENAMES:
|
||||||
"mempalace.yml",
|
continue
|
||||||
"mempal.yaml",
|
if filepath.suffix.lower() not in READABLE_EXTENSIONS and not exact_force_include:
|
||||||
"mempal.yml",
|
continue
|
||||||
".gitignore",
|
if respect_gitignore and active_matchers and not force_include:
|
||||||
"package-lock.json",
|
if is_gitignored(filepath, active_matchers, is_dir=False):
|
||||||
):
|
|
||||||
continue
|
continue
|
||||||
files.append(filepath)
|
files.append(filepath)
|
||||||
return files
|
return files
|
||||||
|
|
||||||
|
|
||||||
@@ -319,6 +563,8 @@ def mine(
|
|||||||
agent: str = "mempalace",
|
agent: str = "mempalace",
|
||||||
limit: int = 0,
|
limit: int = 0,
|
||||||
dry_run: bool = False,
|
dry_run: bool = False,
|
||||||
|
respect_gitignore: bool = True,
|
||||||
|
include_ignored: list = None,
|
||||||
):
|
):
|
||||||
"""Mine a project directory into the palace."""
|
"""Mine a project directory into the palace."""
|
||||||
|
|
||||||
@@ -328,7 +574,11 @@ def mine(
|
|||||||
wing = wing_override or config["wing"]
|
wing = wing_override or config["wing"]
|
||||||
rooms = config.get("rooms", [{"name": "general", "description": "All project files"}])
|
rooms = config.get("rooms", [{"name": "general", "description": "All project files"}])
|
||||||
|
|
||||||
files = scan_project(project_dir)
|
files = scan_project(
|
||||||
|
project_dir,
|
||||||
|
respect_gitignore=respect_gitignore,
|
||||||
|
include_ignored=include_ignored,
|
||||||
|
)
|
||||||
if limit > 0:
|
if limit > 0:
|
||||||
files = files[:limit]
|
files = files[:limit]
|
||||||
|
|
||||||
@@ -341,6 +591,10 @@ def mine(
|
|||||||
print(f" Palace: {palace_path}")
|
print(f" Palace: {palace_path}")
|
||||||
if dry_run:
|
if dry_run:
|
||||||
print(" DRY RUN — nothing will be filed")
|
print(" DRY RUN — nothing will be filed")
|
||||||
|
if not respect_gitignore:
|
||||||
|
print(" .gitignore: DISABLED")
|
||||||
|
if include_ignored:
|
||||||
|
print(f" Include: {', '.join(sorted(normalize_include_paths(include_ignored)))}")
|
||||||
print(f"{'─' * 55}\n")
|
print(f"{'─' * 55}\n")
|
||||||
|
|
||||||
if not dry_run:
|
if not dry_run:
|
||||||
|
|||||||
+197
-25
@@ -1,36 +1,208 @@
|
|||||||
import os
|
import os
|
||||||
import tempfile
|
|
||||||
import shutil
|
import shutil
|
||||||
import yaml
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import chromadb
|
import chromadb
|
||||||
from mempalace.miner import mine
|
import yaml
|
||||||
|
|
||||||
|
from mempalace.miner import mine, scan_project
|
||||||
|
|
||||||
|
|
||||||
|
def write_file(path: Path, content: str):
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
path.write_text(content, encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
def scanned_files(project_root: Path, **kwargs):
|
||||||
|
files = scan_project(str(project_root), **kwargs)
|
||||||
|
return sorted(path.relative_to(project_root).as_posix() for path in files)
|
||||||
|
|
||||||
|
|
||||||
def test_project_mining():
|
def test_project_mining():
|
||||||
tmpdir = tempfile.mkdtemp()
|
tmpdir = tempfile.mkdtemp()
|
||||||
# Create a mini project
|
try:
|
||||||
os.makedirs(os.path.join(tmpdir, "backend"))
|
project_root = Path(tmpdir).resolve()
|
||||||
with open(os.path.join(tmpdir, "backend", "app.py"), "w") as f:
|
os.makedirs(project_root / "backend")
|
||||||
f.write("def main():\n print('hello world')\n" * 20)
|
|
||||||
# Create config
|
write_file(
|
||||||
with open(os.path.join(tmpdir, "mempalace.yaml"), "w") as f:
|
project_root / "backend" / "app.py", "def main():\n print('hello world')\n" * 20
|
||||||
yaml.dump(
|
|
||||||
{
|
|
||||||
"wing": "test_project",
|
|
||||||
"rooms": [
|
|
||||||
{"name": "backend", "description": "Backend code"},
|
|
||||||
{"name": "general", "description": "General"},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
f,
|
|
||||||
)
|
)
|
||||||
|
with open(project_root / "mempalace.yaml", "w") as f:
|
||||||
|
yaml.dump(
|
||||||
|
{
|
||||||
|
"wing": "test_project",
|
||||||
|
"rooms": [
|
||||||
|
{"name": "backend", "description": "Backend code"},
|
||||||
|
{"name": "general", "description": "General"},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
f,
|
||||||
|
)
|
||||||
|
|
||||||
palace_path = os.path.join(tmpdir, "palace")
|
palace_path = project_root / "palace"
|
||||||
mine(tmpdir, palace_path)
|
mine(str(project_root), str(palace_path))
|
||||||
|
|
||||||
# Verify
|
client = chromadb.PersistentClient(path=str(palace_path))
|
||||||
client = chromadb.PersistentClient(path=palace_path)
|
col = client.get_collection("mempalace_drawers")
|
||||||
col = client.get_collection("mempalace_drawers")
|
assert col.count() > 0
|
||||||
assert col.count() > 0
|
finally:
|
||||||
|
shutil.rmtree(tmpdir)
|
||||||
|
|
||||||
shutil.rmtree(tmpdir)
|
|
||||||
|
def test_scan_project_respects_gitignore():
|
||||||
|
tmpdir = tempfile.mkdtemp()
|
||||||
|
try:
|
||||||
|
project_root = Path(tmpdir).resolve()
|
||||||
|
|
||||||
|
write_file(project_root / ".gitignore", "ignored.py\ngenerated/\n")
|
||||||
|
write_file(project_root / "src" / "app.py", "print('hello')\n" * 20)
|
||||||
|
write_file(project_root / "ignored.py", "print('ignore me')\n" * 20)
|
||||||
|
write_file(project_root / "generated" / "artifact.py", "print('artifact')\n" * 20)
|
||||||
|
|
||||||
|
assert scanned_files(project_root) == ["src/app.py"]
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(tmpdir)
|
||||||
|
|
||||||
|
|
||||||
|
def test_scan_project_respects_nested_gitignore():
|
||||||
|
tmpdir = tempfile.mkdtemp()
|
||||||
|
try:
|
||||||
|
project_root = Path(tmpdir).resolve()
|
||||||
|
|
||||||
|
write_file(project_root / ".gitignore", "*.log\n")
|
||||||
|
write_file(project_root / "subrepo" / ".gitignore", "tasks/\n")
|
||||||
|
write_file(project_root / "subrepo" / "src" / "main.py", "print('main')\n" * 20)
|
||||||
|
write_file(project_root / "subrepo" / "tasks" / "task.py", "print('task')\n" * 20)
|
||||||
|
write_file(project_root / "subrepo" / "debug.log", "debug\n" * 20)
|
||||||
|
|
||||||
|
assert scanned_files(project_root) == ["subrepo/src/main.py"]
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(tmpdir)
|
||||||
|
|
||||||
|
|
||||||
|
def test_scan_project_allows_nested_gitignore_override():
|
||||||
|
tmpdir = tempfile.mkdtemp()
|
||||||
|
try:
|
||||||
|
project_root = Path(tmpdir).resolve()
|
||||||
|
|
||||||
|
write_file(project_root / ".gitignore", "*.csv\n")
|
||||||
|
write_file(project_root / "subrepo" / ".gitignore", "!keep.csv\n")
|
||||||
|
write_file(project_root / "drop.csv", "a,b,c\n" * 20)
|
||||||
|
write_file(project_root / "subrepo" / "keep.csv", "a,b,c\n" * 20)
|
||||||
|
|
||||||
|
assert scanned_files(project_root) == ["subrepo/keep.csv"]
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(tmpdir)
|
||||||
|
|
||||||
|
|
||||||
|
def test_scan_project_allows_gitignore_negation_when_parent_dir_is_visible():
|
||||||
|
tmpdir = tempfile.mkdtemp()
|
||||||
|
try:
|
||||||
|
project_root = Path(tmpdir).resolve()
|
||||||
|
|
||||||
|
write_file(project_root / ".gitignore", "generated/*\n!generated/keep.py\n")
|
||||||
|
write_file(project_root / "generated" / "drop.py", "print('drop')\n" * 20)
|
||||||
|
write_file(project_root / "generated" / "keep.py", "print('keep')\n" * 20)
|
||||||
|
|
||||||
|
assert scanned_files(project_root) == ["generated/keep.py"]
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(tmpdir)
|
||||||
|
|
||||||
|
|
||||||
|
def test_scan_project_does_not_reinclude_file_from_ignored_directory():
|
||||||
|
tmpdir = tempfile.mkdtemp()
|
||||||
|
try:
|
||||||
|
project_root = Path(tmpdir).resolve()
|
||||||
|
|
||||||
|
write_file(project_root / ".gitignore", "generated/\n!generated/keep.py\n")
|
||||||
|
write_file(project_root / "generated" / "drop.py", "print('drop')\n" * 20)
|
||||||
|
write_file(project_root / "generated" / "keep.py", "print('keep')\n" * 20)
|
||||||
|
|
||||||
|
assert scanned_files(project_root) == []
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(tmpdir)
|
||||||
|
|
||||||
|
|
||||||
|
def test_scan_project_can_disable_gitignore():
|
||||||
|
tmpdir = tempfile.mkdtemp()
|
||||||
|
try:
|
||||||
|
project_root = Path(tmpdir).resolve()
|
||||||
|
|
||||||
|
write_file(project_root / ".gitignore", "data/\n")
|
||||||
|
write_file(project_root / "data" / "stuff.csv", "a,b,c\n" * 20)
|
||||||
|
|
||||||
|
assert scanned_files(project_root, respect_gitignore=False) == ["data/stuff.csv"]
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(tmpdir)
|
||||||
|
|
||||||
|
|
||||||
|
def test_scan_project_can_include_ignored_directory():
|
||||||
|
tmpdir = tempfile.mkdtemp()
|
||||||
|
try:
|
||||||
|
project_root = Path(tmpdir).resolve()
|
||||||
|
|
||||||
|
write_file(project_root / ".gitignore", "docs/\n")
|
||||||
|
write_file(project_root / "docs" / "guide.md", "# Guide\n" * 20)
|
||||||
|
|
||||||
|
assert scanned_files(project_root, include_ignored=["docs"]) == ["docs/guide.md"]
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(tmpdir)
|
||||||
|
|
||||||
|
|
||||||
|
def test_scan_project_can_include_specific_ignored_file():
|
||||||
|
tmpdir = tempfile.mkdtemp()
|
||||||
|
try:
|
||||||
|
project_root = Path(tmpdir).resolve()
|
||||||
|
|
||||||
|
write_file(project_root / ".gitignore", "generated/\n")
|
||||||
|
write_file(project_root / "generated" / "drop.py", "print('drop')\n" * 20)
|
||||||
|
write_file(project_root / "generated" / "keep.py", "print('keep')\n" * 20)
|
||||||
|
|
||||||
|
assert scanned_files(project_root, include_ignored=["generated/keep.py"]) == [
|
||||||
|
"generated/keep.py"
|
||||||
|
]
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(tmpdir)
|
||||||
|
|
||||||
|
|
||||||
|
def test_scan_project_can_include_exact_file_without_known_extension():
|
||||||
|
tmpdir = tempfile.mkdtemp()
|
||||||
|
try:
|
||||||
|
project_root = Path(tmpdir).resolve()
|
||||||
|
|
||||||
|
write_file(project_root / ".gitignore", "README\n")
|
||||||
|
write_file(project_root / "README", "hello\n" * 20)
|
||||||
|
|
||||||
|
assert scanned_files(project_root, include_ignored=["README"]) == ["README"]
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(tmpdir)
|
||||||
|
|
||||||
|
|
||||||
|
def test_scan_project_include_override_beats_skip_dirs():
|
||||||
|
tmpdir = tempfile.mkdtemp()
|
||||||
|
try:
|
||||||
|
project_root = Path(tmpdir).resolve()
|
||||||
|
|
||||||
|
write_file(project_root / ".pytest_cache" / "cache.py", "print('cache')\n" * 20)
|
||||||
|
|
||||||
|
assert scanned_files(
|
||||||
|
project_root,
|
||||||
|
respect_gitignore=False,
|
||||||
|
include_ignored=[".pytest_cache"],
|
||||||
|
) == [".pytest_cache/cache.py"]
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(tmpdir)
|
||||||
|
|
||||||
|
|
||||||
|
def test_scan_project_skip_dirs_still_apply_without_override():
|
||||||
|
tmpdir = tempfile.mkdtemp()
|
||||||
|
try:
|
||||||
|
project_root = Path(tmpdir).resolve()
|
||||||
|
|
||||||
|
write_file(project_root / ".pytest_cache" / "cache.py", "print('cache')\n" * 20)
|
||||||
|
write_file(project_root / "main.py", "print('main')\n" * 20)
|
||||||
|
|
||||||
|
assert scanned_files(project_root, respect_gitignore=False) == ["main.py"]
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(tmpdir)
|
||||||
|
|||||||
Reference in New Issue
Block a user