fix: align cmd_compress dict keys with compression_stats() return values (#569)
* fix: align cmd_compress dict keys with compression_stats() return values * test: align compress test mocks with actual compression_stats() keys * fix: address review — add Total: assertion, move stats key test to test_dialect.py
This commit is contained in:
+4
-4
@@ -349,7 +349,7 @@ def cmd_compress(args):
|
|||||||
stats = dialect.compression_stats(doc, compressed)
|
stats = dialect.compression_stats(doc, compressed)
|
||||||
|
|
||||||
total_original += stats["original_chars"]
|
total_original += stats["original_chars"]
|
||||||
total_compressed += stats["compressed_chars"]
|
total_compressed += stats["summary_chars"]
|
||||||
|
|
||||||
compressed_entries.append((doc_id, compressed, meta, stats))
|
compressed_entries.append((doc_id, compressed, meta, stats))
|
||||||
|
|
||||||
@@ -359,7 +359,7 @@ def cmd_compress(args):
|
|||||||
source = Path(meta.get("source_file", "?")).name
|
source = Path(meta.get("source_file", "?")).name
|
||||||
print(f" [{wing_name}/{room_name}] {source}")
|
print(f" [{wing_name}/{room_name}] {source}")
|
||||||
print(
|
print(
|
||||||
f" {stats['original_tokens']}t -> {stats['compressed_tokens']}t ({stats['ratio']:.1f}x)"
|
f" {stats['original_tokens_est']}t -> {stats['summary_tokens_est']}t ({stats['size_ratio']:.1f}x)"
|
||||||
)
|
)
|
||||||
print(f" {compressed}")
|
print(f" {compressed}")
|
||||||
print()
|
print()
|
||||||
@@ -370,8 +370,8 @@ def cmd_compress(args):
|
|||||||
comp_col = client.get_or_create_collection("mempalace_compressed")
|
comp_col = client.get_or_create_collection("mempalace_compressed")
|
||||||
for doc_id, compressed, meta, stats in compressed_entries:
|
for doc_id, compressed, meta, stats in compressed_entries:
|
||||||
comp_meta = dict(meta)
|
comp_meta = dict(meta)
|
||||||
comp_meta["compression_ratio"] = round(stats["ratio"], 1)
|
comp_meta["compression_ratio"] = round(stats["size_ratio"], 1)
|
||||||
comp_meta["original_tokens"] = stats["original_tokens"]
|
comp_meta["original_tokens"] = stats["original_tokens_est"]
|
||||||
comp_col.upsert(
|
comp_col.upsert(
|
||||||
ids=[doc_id],
|
ids=[doc_id],
|
||||||
documents=[compressed],
|
documents=[compressed],
|
||||||
|
|||||||
+12
-8
@@ -546,10 +546,11 @@ def test_cmd_compress_dry_run(mock_config_cls, capsys):
|
|||||||
mock_dialect.compress.return_value = "compressed"
|
mock_dialect.compress.return_value = "compressed"
|
||||||
mock_dialect.compression_stats.return_value = {
|
mock_dialect.compression_stats.return_value = {
|
||||||
"original_chars": 100,
|
"original_chars": 100,
|
||||||
"compressed_chars": 30,
|
"summary_chars": 30,
|
||||||
"original_tokens": 25,
|
"original_tokens_est": 25,
|
||||||
"compressed_tokens": 8,
|
"summary_tokens_est": 8,
|
||||||
"ratio": 3.3,
|
"size_ratio": 3.3,
|
||||||
|
"note": "Estimates only.",
|
||||||
}
|
}
|
||||||
mock_dialect_mod = _make_mock_dialect_module(mock_dialect)
|
mock_dialect_mod = _make_mock_dialect_module(mock_dialect)
|
||||||
|
|
||||||
@@ -564,6 +565,7 @@ def test_cmd_compress_dry_run(mock_config_cls, capsys):
|
|||||||
out = capsys.readouterr().out
|
out = capsys.readouterr().out
|
||||||
assert "dry run" in out.lower()
|
assert "dry run" in out.lower()
|
||||||
assert "Compressing" in out
|
assert "Compressing" in out
|
||||||
|
assert "Total:" in out
|
||||||
|
|
||||||
|
|
||||||
@patch("mempalace.cli.MempalaceConfig")
|
@patch("mempalace.cli.MempalaceConfig")
|
||||||
@@ -619,10 +621,11 @@ def test_cmd_compress_stores_results(mock_config_cls, capsys):
|
|||||||
mock_dialect.compress.return_value = "compressed"
|
mock_dialect.compress.return_value = "compressed"
|
||||||
mock_dialect.compression_stats.return_value = {
|
mock_dialect.compression_stats.return_value = {
|
||||||
"original_chars": 100,
|
"original_chars": 100,
|
||||||
"compressed_chars": 30,
|
"summary_chars": 30,
|
||||||
"original_tokens": 25,
|
"original_tokens_est": 25,
|
||||||
"compressed_tokens": 8,
|
"summary_tokens_est": 8,
|
||||||
"ratio": 3.3,
|
"size_ratio": 3.3,
|
||||||
|
"note": "Estimates only.",
|
||||||
}
|
}
|
||||||
mock_dialect_mod = _make_mock_dialect_module(mock_dialect)
|
mock_dialect_mod = _make_mock_dialect_module(mock_dialect)
|
||||||
|
|
||||||
@@ -636,6 +639,7 @@ def test_cmd_compress_stores_results(mock_config_cls, capsys):
|
|||||||
cmd_compress(args)
|
cmd_compress(args)
|
||||||
out = capsys.readouterr().out
|
out = capsys.readouterr().out
|
||||||
assert "Stored" in out
|
assert "Stored" in out
|
||||||
|
assert "Total:" in out
|
||||||
mock_comp_col.upsert.assert_called_once()
|
mock_comp_col.upsert.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -115,6 +115,20 @@ class TestCompressionStats:
|
|||||||
def test_count_tokens(self):
|
def test_count_tokens(self):
|
||||||
assert Dialect.count_tokens("hello world") == 2
|
assert Dialect.count_tokens("hello world") == 2
|
||||||
|
|
||||||
|
def test_compression_stats_keys(self):
|
||||||
|
"""Verify compression_stats() returns the expected key set."""
|
||||||
|
d = Dialect()
|
||||||
|
stats = d.compression_stats("hello world this is a test", "HW:test")
|
||||||
|
expected_keys = {
|
||||||
|
"original_chars",
|
||||||
|
"summary_chars",
|
||||||
|
"original_tokens_est",
|
||||||
|
"summary_tokens_est",
|
||||||
|
"size_ratio",
|
||||||
|
"note",
|
||||||
|
}
|
||||||
|
assert set(stats.keys()) == expected_keys
|
||||||
|
|
||||||
|
|
||||||
class TestZettelEncoding:
|
class TestZettelEncoding:
|
||||||
def test_encode_zettel(self):
|
def test_encode_zettel(self):
|
||||||
|
|||||||
Reference in New Issue
Block a user