diff --git a/mempalace/cli.py b/mempalace/cli.py index 1d106ca..d6b1e0a 100644 --- a/mempalace/cli.py +++ b/mempalace/cli.py @@ -349,7 +349,7 @@ def cmd_compress(args): stats = dialect.compression_stats(doc, compressed) total_original += stats["original_chars"] - total_compressed += stats["compressed_chars"] + total_compressed += stats["summary_chars"] compressed_entries.append((doc_id, compressed, meta, stats)) @@ -359,7 +359,7 @@ def cmd_compress(args): source = Path(meta.get("source_file", "?")).name print(f" [{wing_name}/{room_name}] {source}") print( - f" {stats['original_tokens']}t -> {stats['compressed_tokens']}t ({stats['ratio']:.1f}x)" + f" {stats['original_tokens_est']}t -> {stats['summary_tokens_est']}t ({stats['size_ratio']:.1f}x)" ) print(f" {compressed}") print() @@ -370,8 +370,8 @@ def cmd_compress(args): comp_col = client.get_or_create_collection("mempalace_compressed") for doc_id, compressed, meta, stats in compressed_entries: comp_meta = dict(meta) - comp_meta["compression_ratio"] = round(stats["ratio"], 1) - comp_meta["original_tokens"] = stats["original_tokens"] + comp_meta["compression_ratio"] = round(stats["size_ratio"], 1) + comp_meta["original_tokens"] = stats["original_tokens_est"] comp_col.upsert( ids=[doc_id], documents=[compressed], diff --git a/tests/test_cli.py b/tests/test_cli.py index e3c68f9..dc55f23 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -546,10 +546,11 @@ def test_cmd_compress_dry_run(mock_config_cls, capsys): mock_dialect.compress.return_value = "compressed" mock_dialect.compression_stats.return_value = { "original_chars": 100, - "compressed_chars": 30, - "original_tokens": 25, - "compressed_tokens": 8, - "ratio": 3.3, + "summary_chars": 30, + "original_tokens_est": 25, + "summary_tokens_est": 8, + "size_ratio": 3.3, + "note": "Estimates only.", } mock_dialect_mod = _make_mock_dialect_module(mock_dialect) @@ -564,6 +565,7 @@ def test_cmd_compress_dry_run(mock_config_cls, capsys): out = capsys.readouterr().out assert "dry run" in out.lower() assert "Compressing" in out + assert "Total:" in out @patch("mempalace.cli.MempalaceConfig") @@ -619,10 +621,11 @@ def test_cmd_compress_stores_results(mock_config_cls, capsys): mock_dialect.compress.return_value = "compressed" mock_dialect.compression_stats.return_value = { "original_chars": 100, - "compressed_chars": 30, - "original_tokens": 25, - "compressed_tokens": 8, - "ratio": 3.3, + "summary_chars": 30, + "original_tokens_est": 25, + "summary_tokens_est": 8, + "size_ratio": 3.3, + "note": "Estimates only.", } mock_dialect_mod = _make_mock_dialect_module(mock_dialect) @@ -636,6 +639,7 @@ def test_cmd_compress_stores_results(mock_config_cls, capsys): cmd_compress(args) out = capsys.readouterr().out assert "Stored" in out + assert "Total:" in out mock_comp_col.upsert.assert_called_once() diff --git a/tests/test_dialect.py b/tests/test_dialect.py index 8edc7ec..057445a 100644 --- a/tests/test_dialect.py +++ b/tests/test_dialect.py @@ -115,6 +115,20 @@ class TestCompressionStats: def test_count_tokens(self): assert Dialect.count_tokens("hello world") == 2 + def test_compression_stats_keys(self): + """Verify compression_stats() returns the expected key set.""" + d = Dialect() + stats = d.compression_stats("hello world this is a test", "HW:test") + expected_keys = { + "original_chars", + "summary_chars", + "original_tokens_est", + "summary_tokens_est", + "size_ratio", + "note", + } + assert set(stats.keys()) == expected_keys + class TestZettelEncoding: def test_encode_zettel(self):