fix: align cmd_compress dict keys with compression_stats() return values (#569)

* fix: align cmd_compress dict keys with compression_stats() return values

* test: align compress test mocks with actual compression_stats() keys

* fix: address review — add Total: assertion, move stats key test to test_dialect.py
This commit is contained in:
Arnold Wender
2026-04-12 01:16:31 +02:00
committed by GitHub
parent 9c4b7302cc
commit 89c0a58271
3 changed files with 30 additions and 12 deletions
+4 -4
View File
@@ -349,7 +349,7 @@ def cmd_compress(args):
stats = dialect.compression_stats(doc, compressed) stats = dialect.compression_stats(doc, compressed)
total_original += stats["original_chars"] total_original += stats["original_chars"]
total_compressed += stats["compressed_chars"] total_compressed += stats["summary_chars"]
compressed_entries.append((doc_id, compressed, meta, stats)) compressed_entries.append((doc_id, compressed, meta, stats))
@@ -359,7 +359,7 @@ def cmd_compress(args):
source = Path(meta.get("source_file", "?")).name source = Path(meta.get("source_file", "?")).name
print(f" [{wing_name}/{room_name}] {source}") print(f" [{wing_name}/{room_name}] {source}")
print( print(
f" {stats['original_tokens']}t -> {stats['compressed_tokens']}t ({stats['ratio']:.1f}x)" f" {stats['original_tokens_est']}t -> {stats['summary_tokens_est']}t ({stats['size_ratio']:.1f}x)"
) )
print(f" {compressed}") print(f" {compressed}")
print() print()
@@ -370,8 +370,8 @@ def cmd_compress(args):
comp_col = client.get_or_create_collection("mempalace_compressed") comp_col = client.get_or_create_collection("mempalace_compressed")
for doc_id, compressed, meta, stats in compressed_entries: for doc_id, compressed, meta, stats in compressed_entries:
comp_meta = dict(meta) comp_meta = dict(meta)
comp_meta["compression_ratio"] = round(stats["ratio"], 1) comp_meta["compression_ratio"] = round(stats["size_ratio"], 1)
comp_meta["original_tokens"] = stats["original_tokens"] comp_meta["original_tokens"] = stats["original_tokens_est"]
comp_col.upsert( comp_col.upsert(
ids=[doc_id], ids=[doc_id],
documents=[compressed], documents=[compressed],
+12 -8
View File
@@ -546,10 +546,11 @@ def test_cmd_compress_dry_run(mock_config_cls, capsys):
mock_dialect.compress.return_value = "compressed" mock_dialect.compress.return_value = "compressed"
mock_dialect.compression_stats.return_value = { mock_dialect.compression_stats.return_value = {
"original_chars": 100, "original_chars": 100,
"compressed_chars": 30, "summary_chars": 30,
"original_tokens": 25, "original_tokens_est": 25,
"compressed_tokens": 8, "summary_tokens_est": 8,
"ratio": 3.3, "size_ratio": 3.3,
"note": "Estimates only.",
} }
mock_dialect_mod = _make_mock_dialect_module(mock_dialect) mock_dialect_mod = _make_mock_dialect_module(mock_dialect)
@@ -564,6 +565,7 @@ def test_cmd_compress_dry_run(mock_config_cls, capsys):
out = capsys.readouterr().out out = capsys.readouterr().out
assert "dry run" in out.lower() assert "dry run" in out.lower()
assert "Compressing" in out assert "Compressing" in out
assert "Total:" in out
@patch("mempalace.cli.MempalaceConfig") @patch("mempalace.cli.MempalaceConfig")
@@ -619,10 +621,11 @@ def test_cmd_compress_stores_results(mock_config_cls, capsys):
mock_dialect.compress.return_value = "compressed" mock_dialect.compress.return_value = "compressed"
mock_dialect.compression_stats.return_value = { mock_dialect.compression_stats.return_value = {
"original_chars": 100, "original_chars": 100,
"compressed_chars": 30, "summary_chars": 30,
"original_tokens": 25, "original_tokens_est": 25,
"compressed_tokens": 8, "summary_tokens_est": 8,
"ratio": 3.3, "size_ratio": 3.3,
"note": "Estimates only.",
} }
mock_dialect_mod = _make_mock_dialect_module(mock_dialect) mock_dialect_mod = _make_mock_dialect_module(mock_dialect)
@@ -636,6 +639,7 @@ def test_cmd_compress_stores_results(mock_config_cls, capsys):
cmd_compress(args) cmd_compress(args)
out = capsys.readouterr().out out = capsys.readouterr().out
assert "Stored" in out assert "Stored" in out
assert "Total:" in out
mock_comp_col.upsert.assert_called_once() mock_comp_col.upsert.assert_called_once()
+14
View File
@@ -115,6 +115,20 @@ class TestCompressionStats:
def test_count_tokens(self): def test_count_tokens(self):
assert Dialect.count_tokens("hello world") == 2 assert Dialect.count_tokens("hello world") == 2
def test_compression_stats_keys(self):
"""Verify compression_stats() returns the expected key set."""
d = Dialect()
stats = d.compression_stats("hello world this is a test", "HW:test")
expected_keys = {
"original_chars",
"summary_chars",
"original_tokens_est",
"summary_tokens_est",
"size_ratio",
"note",
}
assert set(stats.keys()) == expected_keys
class TestZettelEncoding: class TestZettelEncoding:
def test_encode_zettel(self): def test_encode_zettel(self):