fix(mubu): harden document resolution and state branding

Deduplicate logical document paths by freshest metadata so open-path resolves real Mubu history duplicates consistently. Overlay backup-driven show/link output with metadata titles and doc paths, and default new public state to ~/.config/mubu-cli while preserving legacy fallback.
2026-05-06 14:11:57 +08:00 · 2026-03-18 17:13:44 +08:00
parent f15052487e
commit 71c1ae6dcf
4 changed files with 354 additions and 53 deletions
--- a/mubu/agent-harness/cli_anything/mubu/mubu_cli.py
+++ b/mubu/agent-harness/cli_anything/mubu/mubu_cli.py
@@ -16,6 +16,8 @@ from cli_anything.mubu.utils import ReplSkin

 CONTEXT_SETTINGS = {"ignore_unknown_options": True, "allow_extra_args": True}
 COMMAND_HISTORY_LIMIT = 50
+PUBLIC_PROGRAM_NAME = "mubu-cli"
+COMPAT_PROGRAM_NAME = "cli-anything-mubu"
 DISCOVER_COMMANDS = {
    "docs": "List latest known document snapshots from local backups.",
    "folders": "List folder metadata from local RxDB storage.",
@@ -44,7 +46,7 @@ LEGACY_COMMANDS.update(DISCOVER_COMMANDS)
 LEGACY_COMMANDS.update(INSPECT_COMMANDS)
 LEGACY_COMMANDS.update(MUTATE_COMMANDS)

-REPL_HELP = """Interactive REPL for cli-anything-mubu
+REPL_HELP_TEMPLATE = """Interactive REPL for {program_name}

 Builtins:
  help              Show this REPL help
@@ -72,13 +74,32 @@ Examples:

 If you prefer no-argument daily helpers, set MUBU_DAILY_FOLDER='<daily-folder-ref>'.
 """
+REPL_COMMAND_HELP = REPL_HELP_TEMPLATE.format(program_name="the Mubu CLI")
+
+
+def normalize_program_name(program_name: str | None) -> str:
+    candidate = Path(program_name or "").name.strip()
+    if candidate == PUBLIC_PROGRAM_NAME:
+        return PUBLIC_PROGRAM_NAME
+    return COMPAT_PROGRAM_NAME
+
+
+def repl_help_text(program_name: str | None = None) -> str:
+    return REPL_HELP_TEMPLATE.format(program_name=normalize_program_name(program_name))


 def session_state_dir() -> Path:
    override = os.environ.get("CLI_ANYTHING_MUBU_STATE_DIR", "").strip()
    if override:
        return Path(override).expanduser()
-    return Path.home() / ".config" / "cli-anything-mubu"
+    config_root = Path.home() / ".config"
+    public_dir = config_root / PUBLIC_PROGRAM_NAME
+    legacy_dir = config_root / COMPAT_PROGRAM_NAME
+    if public_dir.exists():
+        return public_dir
+    if legacy_dir.exists():
+        return legacy_dir
+    return public_dir


 def session_state_path() -> Path:
@@ -253,14 +274,23 @@ def invoke_probe_command(ctx: click.Context | None, command_name: str, probe_arg
    return int(result or 0)


-def print_repl_banner(skin: ReplSkin) -> None:
+def print_repl_banner(skin: ReplSkin, program_name: str | None = None) -> None:
+    normalized_program_name = normalize_program_name(program_name)
    click.echo("Mubu REPL")
-    skin.print_banner()
+    if normalized_program_name == PUBLIC_PROGRAM_NAME:
+        click.echo(f"Command: {PUBLIC_PROGRAM_NAME}")
+        click.echo(f"Version: {__version__}")
+        if skin.skill_path:
+            click.echo(f"Skill: {skin.skill_path}")
+        click.echo("Type help for commands, quit to exit")
+        click.echo()
+    else:
+        skin.print_banner()
    click.echo(f"History: {skin.history_file}")


-def print_repl_help() -> None:
-    click.echo(REPL_HELP.rstrip())
+def print_repl_help(program_name: str | None = None) -> None:
+    click.echo(repl_help_text(program_name).rstrip())


 def parse_history_limit(argv: Sequence[str]) -> int:
@@ -272,7 +302,11 @@ def parse_history_limit(argv: Sequence[str]) -> int:
        raise RuntimeError(f"history limit must be an integer: {argv[1]}") from exc


-def handle_repl_builtin(argv: list[str], session: dict[str, object]) -> tuple[bool, int]:
+def handle_repl_builtin(
+    argv: list[str],
+    session: dict[str, object],
+    program_name: str | None = None,
+) -> tuple[bool, int]:
    if not argv:
        return True, 0

@@ -280,7 +314,7 @@ def handle_repl_builtin(argv: list[str], session: dict[str, object]) -> tuple[bo
    if command in {"exit", "quit"}:
        return True, 1
    if command == "help":
-        print_repl_help()
+        print_repl_help(program_name)
        return True, 0
    if command == "current-doc":
        current_doc = session.get("current_doc")
@@ -353,11 +387,11 @@ def handle_repl_builtin(argv: list[str], session: dict[str, object]) -> tuple[bo
    return False, 0


-def run_repl() -> int:
+def run_repl(program_name: str | None = None) -> int:
    session = load_session_state()
    skin = ReplSkin("mubu", version=__version__, history_file=str(session_state_dir() / "history.txt"))
    prompt_session = skin.create_prompt_session()
-    print_repl_banner(skin)
+    print_repl_banner(skin, program_name)
    if session.get("current_doc"):
        click.echo(f"Current doc: {session['current_doc']}")
    if session.get("current_node"):
@@ -383,7 +417,7 @@ def run_repl() -> int:
            click.echo(f"parse error: {exc}", err=True)
            continue

-        handled, control = handle_repl_builtin(argv, session)
+        handled, control = handle_repl_builtin(argv, session, program_name)
        if handled:
            if control == 1:
                skin.print_goodbye()
@@ -405,8 +439,9 @@ def cli(ctx: click.Context, json_output: bool) -> int:
    """Agent-native CLI for the Mubu desktop app with REPL and grouped command domains."""
    ctx.ensure_object(dict)
    ctx.obj["json_output"] = json_output
+    ctx.obj["prog_name"] = normalize_program_name(ctx.info_name)
    if ctx.invoked_subcommand is None:
-        return run_repl()
+        return run_repl(ctx.obj["prog_name"])
    return 0


@@ -668,10 +703,15 @@ def history_command(ctx: click.Context, limit: int, json_output: bool) -> int:
    return 0


-@cli.command("repl", help=REPL_HELP)
-def repl_command() -> int:
-    """Interactive REPL for cli-anything-mubu."""
-    return run_repl()
+@cli.command("repl", help=REPL_COMMAND_HELP)
+@click.pass_context
+def repl_command(ctx: click.Context) -> int:
+    """Interactive REPL for the Mubu CLI."""
+    root = ctx.find_root()
+    program_name = None
+    if root is not None and root.obj is not None:
+        program_name = root.obj.get("prog_name")
+    return run_repl(program_name)


 def create_legacy_command(command_name: str, help_text: str) -> click.Command:
@@ -688,10 +728,11 @@ for _command_name, _help_text in LEGACY_COMMANDS.items():
    cli.add_command(create_legacy_command(_command_name, _help_text))


-def dispatch(argv: list[str] | None = None) -> int:
+def dispatch(argv: list[str] | None = None, prog_name: str | None = None) -> int:
    args = list(sys.argv[1:] if argv is None else argv)
+    normalized_prog_name = normalize_program_name(prog_name or sys.argv[0])
    try:
-        result = cli.main(args=args, prog_name="cli-anything-mubu", standalone_mode=False)
+        result = cli.main(args=args, prog_name=normalized_prog_name, standalone_mode=False)
    except click.exceptions.Exit as exc:
        return int(exc.exit_code)
    except click.ClickException as exc:
@@ -701,7 +742,7 @@ def dispatch(argv: list[str] | None = None) -> int:


 def entrypoint(argv: list[str] | None = None) -> int:
-    return dispatch(argv)
+    return dispatch(argv, prog_name=sys.argv[0])


 __all__ = [
@@ -712,10 +753,12 @@ __all__ = [
    "default_session_state",
    "dispatch",
    "entrypoint",
+    "normalize_program_name",
    "expand_repl_aliases",
    "expand_repl_aliases_with_state",
    "handle_repl_builtin",
    "load_session_state",
+    "repl_help_text",
    "resolve_current_daily_doc_ref",
    "run_repl",
    "save_session_state",
--- a/mubu/agent-harness/cli_anything/mubu/tests/test_cli_entrypoint.py
+++ b/mubu/agent-harness/cli_anything/mubu/tests/test_cli_entrypoint.py
@@ -1,3 +1,5 @@
+import contextlib
+import io
 import os
 import shutil
 import subprocess
@@ -5,8 +7,14 @@ import sys
 import tempfile
 import unittest
 from pathlib import Path
+from unittest import mock

-from cli_anything.mubu.mubu_cli import expand_repl_aliases_with_state
+from cli_anything.mubu.mubu_cli import (
+    dispatch,
+    expand_repl_aliases_with_state,
+    repl_help_text,
+    session_state_dir,
+)
 from mubu_probe import (
    DEFAULT_BACKUP_ROOT,
    DEFAULT_STORAGE_ROOT,
@@ -96,12 +104,49 @@ class CliEntrypointTests(unittest.TestCase):
        self.assertIn("create-child", result.stdout)
        self.assertIn("delete-node", result.stdout)

+    def test_dispatch_uses_public_prog_name_when_requested(self):
+        stdout = io.StringIO()
+        with contextlib.redirect_stdout(stdout):
+            result = dispatch(["--help"], prog_name="mubu-cli")
+        self.assertEqual(result, 0)
+        self.assertIn("Usage: mubu-cli", stdout.getvalue())
+
+    def test_dispatch_uses_compat_prog_name_when_requested(self):
+        stdout = io.StringIO()
+        with contextlib.redirect_stdout(stdout):
+            result = dispatch(["--help"], prog_name="cli-anything-mubu")
+        self.assertEqual(result, 0)
+        self.assertIn("Usage: cli-anything-mubu", stdout.getvalue())
+
    def test_repl_help_renders(self):
        result = self.run_cli(["repl", "--help"])
        self.assertEqual(result.returncode, 0, msg=result.stderr)
        self.assertIn("Interactive REPL", result.stdout)
        self.assertIn("use-node", result.stdout)

+    def test_repl_help_text_supports_public_brand(self):
+        self.assertIn("mubu-cli", repl_help_text("mubu-cli"))
+
+    def test_session_state_dir_defaults_to_public_brand_path(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            home = Path(tmpdir)
+            with (
+                mock.patch.dict(os.environ, {}, clear=False),
+                mock.patch("cli_anything.mubu.mubu_cli.Path.home", return_value=home),
+            ):
+                self.assertEqual(session_state_dir(), home / ".config" / "mubu-cli")
+
+    def test_session_state_dir_falls_back_to_legacy_path_when_only_legacy_exists(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            home = Path(tmpdir)
+            legacy = home / ".config" / "cli-anything-mubu"
+            legacy.mkdir(parents=True)
+            with (
+                mock.patch.dict(os.environ, {}, clear=False),
+                mock.patch("cli_anything.mubu.mubu_cli.Path.home", return_value=home),
+            ):
+                self.assertEqual(session_state_dir(), legacy)
+
    def test_default_entrypoint_starts_repl_and_can_exit(self):
        result = self.run_cli([], input_text="exit\n")
        self.assertEqual(result.returncode, 0, msg=result.stderr)
--- a/mubu/agent-harness/cli_anything/mubu/tests/test_mubu_probe.py
+++ b/mubu/agent-harness/cli_anything/mubu/tests/test_mubu_probe.py
@@ -1,7 +1,10 @@
+import contextlib
+import io
 import json
 import tempfile
 import unittest
 from pathlib import Path
+from unittest import mock

 from mubu_probe import (
    build_api_headers,
@@ -9,6 +12,7 @@ from mubu_probe import (
    build_delete_node_request,
    build_text_update_request,
    choose_current_daily_document,
+    document_links,
    extract_doc_links,
    extract_plain_text,
    folder_documents,
@@ -16,6 +20,7 @@ from mubu_probe import (
    list_document_nodes,
    load_latest_backups,
    looks_like_daily_title,
+    main,
    node_path_to_api_path,
    normalize_document_meta_record,
    normalize_folder_record,
@@ -182,14 +187,15 @@ class PathResolutionTests(unittest.TestCase):
        ]
        self.document_metas = [
            {"doc_id": "docA", "folder_id": "dailyA", "title": "26.03.16", "updated_at": 20},
+            {"doc_id": "docA2", "folder_id": "dailyA", "title": "26.03.16", "updated_at": 25},
            {"doc_id": "docB", "folder_id": "dailyA", "title": "26.3.15", "updated_at": 10},
            {"doc_id": "docC", "folder_id": "dailyB", "title": "26.03.16", "updated_at": 30},
        ]
        self.backups = [
            {
-                "doc_id": "docA",
-                "title": "26.03.16",
-                "backup_file": "/tmp/docA.json",
+                "doc_id": "docA2",
+                "title": "today root",
+                "backup_file": "/tmp/docA2.json",
                "modified_at": 123.0,
                "data": {"viewType": "OUTLINE", "nodes": [{"id": "n1", "text": "<span>today</span>", "children": []}]},
            }
@@ -199,7 +205,7 @@ class PathResolutionTests(unittest.TestCase):
        docs, folder, ambiguous = folder_documents(self.document_metas, self.folders, "Workspace/Daily tasks")
        self.assertEqual(ambiguous, [])
        self.assertEqual(folder["folder_id"], "dailyA")
-        self.assertEqual([doc["doc_id"] for doc in docs], ["docA", "docB"])
+        self.assertEqual([doc["doc_id"] for doc in docs], ["docA2", "docB"])
        self.assertEqual(docs[0]["doc_path"], "Workspace/Daily tasks/26.03.16")

    def test_folder_documents_detects_ambiguous_folder_name(self):
@@ -211,13 +217,56 @@ class PathResolutionTests(unittest.TestCase):
    def test_resolve_document_reference_supports_full_doc_path(self):
        doc, ambiguous = resolve_document_reference(self.document_metas, self.folders, "Workspace/Daily tasks/26.03.16")
        self.assertEqual(ambiguous, [])
-        self.assertEqual(doc["doc_id"], "docA")
+        self.assertEqual(doc["doc_id"], "docA2")
        self.assertEqual(doc["doc_path"], "Workspace/Daily tasks/26.03.16")

    def test_resolve_document_reference_detects_ambiguous_title(self):
        doc, ambiguous = resolve_document_reference(self.document_metas, self.folders, "26.03.16")
        self.assertIsNone(doc)
        self.assertEqual(len(ambiguous), 2)
+        self.assertEqual({item["doc_id"] for item in ambiguous}, {"docA2", "docC"})
+
+    def test_resolve_document_reference_collapses_same_path_duplicates_for_title(self):
+        folders = [
+            {"folder_id": "rootA", "name": "Workspace", "parent_id": "0"},
+            {"folder_id": "dailyA", "name": "Daily tasks", "parent_id": "rootA"},
+        ]
+        metas = [
+            {"doc_id": "old", "folder_id": "dailyA", "title": "26.03.18", "updated_at": 10},
+            {"doc_id": "new", "folder_id": "dailyA", "title": "26.03.18", "updated_at": 20},
+        ]
+
+        doc, ambiguous = resolve_document_reference(metas, folders, "26.03.18")
+
+        self.assertEqual(ambiguous, [])
+        self.assertEqual(doc["doc_id"], "new")
+
+    def test_resolve_document_reference_prefers_newer_timestamp_over_higher_revision_across_doc_ids(self):
+        folders = [
+            {"folder_id": "rootA", "name": "Workspace", "parent_id": "0"},
+            {"folder_id": "dailyA", "name": "Daily tasks", "parent_id": "rootA"},
+        ]
+        metas = [
+            {
+                "doc_id": "old-high-rev",
+                "folder_id": "dailyA",
+                "title": "26.03.19",
+                "updated_at": 10,
+                "_rev": "999-older",
+            },
+            {
+                "doc_id": "new-low-rev",
+                "folder_id": "dailyA",
+                "title": "26.03.19",
+                "updated_at": 20,
+                "_rev": "1-newer",
+            },
+        ]
+
+        doc, ambiguous = resolve_document_reference(metas, folders, "Workspace/Daily tasks/26.03.19")
+
+        self.assertEqual(ambiguous, [])
+        self.assertEqual(doc["doc_id"], "new-low-rev")

    def test_show_document_by_reference_uses_resolved_path(self):
        payload, ambiguous = show_document_by_reference(
@@ -227,13 +276,77 @@ class PathResolutionTests(unittest.TestCase):
            "Workspace/Daily tasks/26.03.16",
        )
        self.assertEqual(ambiguous, [])
-        self.assertEqual(payload["doc_id"], "docA")
+        self.assertEqual(payload["doc_id"], "docA2")
        self.assertEqual(payload["title"], "26.03.16")
        self.assertEqual(payload["folder_path"], "Workspace/Daily tasks")
        self.assertEqual(payload["doc_path"], "Workspace/Daily tasks/26.03.16")
        self.assertEqual(payload["nodes"][0]["text"], "today")


+class DocumentMetadataOverlayTests(unittest.TestCase):
+    def test_document_links_prefers_metadata_title_for_source_document(self):
+        links = document_links(
+            [
+                {
+                    "doc_id": "docA",
+                    "title": "root node title",
+                    "data": {
+                        "nodes": [
+                            {
+                                "id": "n1",
+                                "text": (
+                                    '<a class="mention mm-iconfont" '
+                                    'href="https://mubu.com/docdoc-target-1" '
+                                    'data-token="doc-target-1">Target Doc</a>'
+                                ),
+                                "children": [],
+                            }
+                        ]
+                    },
+                }
+            ],
+            "docA",
+            title_lookup={"docA": "26.03.18", "doc-target-1": "Target Doc"},
+        )
+
+        self.assertEqual(len(links), 1)
+        self.assertEqual(links[0]["source_doc_title"], "26.03.18")
+
+    def test_show_command_prefers_metadata_title_and_path_when_available(self):
+        backups = [
+            {
+                "doc_id": "docA",
+                "title": "root node title",
+                "backup_file": "/tmp/docA.json",
+                "modified_at": 123.0,
+                "data": {
+                    "viewType": "OUTLINE",
+                    "nodes": [{"id": "n1", "text": "<span>today</span>", "children": []}],
+                },
+            }
+        ]
+        metas = [{"doc_id": "docA", "folder_id": "dailyA", "title": "26.03.18", "updated_at": 20}]
+        folders = [
+            {"folder_id": "rootA", "name": "Workspace", "parent_id": "0"},
+            {"folder_id": "dailyA", "name": "Daily tasks", "parent_id": "rootA"},
+        ]
+
+        stdout = io.StringIO()
+        with (
+            mock.patch("mubu_probe.load_latest_backups", return_value=backups),
+            mock.patch("mubu_probe.load_document_metas", return_value=metas),
+            mock.patch("mubu_probe.load_folders", return_value=folders),
+            contextlib.redirect_stdout(stdout),
+        ):
+            result = main(["show", "docA", "--json"])
+
+        self.assertEqual(result, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertEqual(payload["title"], "26.03.18")
+        self.assertEqual(payload["folder_path"], "Workspace/Daily tasks")
+        self.assertEqual(payload["doc_path"], "Workspace/Daily tasks/26.03.18")
+
+
 class DocumentNodeListingTests(unittest.TestCase):
    def test_list_document_nodes_flattens_tree_for_agent_targeting(self):
        data = {
@@ -299,6 +412,8 @@ class DailySelectionTests(unittest.TestCase):
    def test_looks_like_daily_title_accepts_date_titles_and_rejects_templates(self):
        self.assertTrue(looks_like_daily_title("26.03.16"))
        self.assertTrue(looks_like_daily_title("26.3.8-3.9"))
+        self.assertTrue(looks_like_daily_title("2026-03-18"))
+        self.assertTrue(looks_like_daily_title("2026年3月18日"))
        self.assertFalse(looks_like_daily_title("DDL表"))
        self.assertFalse(looks_like_daily_title("26.2.22模板更新"))

@@ -314,6 +429,17 @@ class DailySelectionTests(unittest.TestCase):
        self.assertEqual(selected["doc_id"], "today")
        self.assertEqual([item["doc_id"] for item in candidates], ["today", "yesterday"])

+    def test_choose_current_daily_document_accepts_full_year_and_cn_date_titles(self):
+        docs = [
+            {"doc_id": "older", "title": "2026年3月17日", "updated_at": 90},
+            {"doc_id": "latest", "title": "2026-03-18", "updated_at": 120},
+            {"doc_id": "other", "title": "项目看板", "updated_at": 130},
+        ]
+
+        selected, candidates = choose_current_daily_document(docs)
+        self.assertEqual(selected["doc_id"], "latest")
+        self.assertEqual([item["doc_id"] for item in candidates], ["latest", "older"])
+
    def test_choose_current_daily_document_can_fallback_to_any_title(self):
        docs = [
            {"doc_id": "ddl", "title": "DDL表", "updated_at": 100},
--- a/mubu/agent-harness/mubu_probe.py
+++ b/mubu/agent-harness/mubu_probe.py
@@ -83,8 +83,15 @@ ANCHOR_RE = re.compile(r"<a\b(?P<attrs>[^>]*)>(?P<label>.*?)</a>", re.IGNORECASE
 TOKEN_ATTR_RE = re.compile(r'data-token="(?P<token>[^"]+)"')
 HREF_DOC_RE = re.compile(r'href="https://mubu\.com/doc(?P<token>[^"?#/]+)"', re.IGNORECASE)
 NODE_ID_ALPHABET = string.ascii_letters + string.digits
-DAILY_TITLE_RE = re.compile(r"^\d{2}\.\d{1,2}\.\d{1,2}(?:-\d{1,2}(?:\.\d{1,2})?)?")
+DAILY_TITLE_PATTERNS = (
+    re.compile(r"^\d{2}\.\d{1,2}\.\d{1,2}(?:-\d{1,2}(?:\.\d{1,2})?)?$"),
+    re.compile(r"^\d{4}[./-]\d{1,2}[./-]\d{1,2}$"),
+    re.compile(r"^\d{4}年\d{1,2}月\d{1,2}日$"),
+    re.compile(r"^\d{1,2}[./-]\d{1,2}$"),
+    re.compile(r"^\d{1,2}月\d{1,2}日$"),
+)
 DEFAULT_DAILY_EXCLUDE_KEYWORDS = ("模板", "template")
+DEFAULT_DAILY_FOLDER_KEYWORDS = ("daily", "diary", "journal", "日记", "日志", "每日", "每天", "日常")


 def configured_daily_folder_ref(env: Mapping[str, str] | None = None) -> str | None:
@@ -411,6 +418,37 @@ def enrich_document_meta(
    }


+def document_meta_sort_key(meta: dict[str, Any]) -> tuple[int, int, str]:
+    return (
+        max(
+            numeric_values(
+                meta.get("updated_at"),
+                meta.get("created_at"),
+                meta.get("modified_at"),
+            ),
+            default=0,
+        ),
+        parse_revision_generation(meta.get("_rev") or meta.get("rev")),
+        str(meta.get("doc_id") or ""),
+    )
+
+
+def dedupe_document_metas_by_logical_path(
+    document_metas: Iterable[dict[str, Any]],
+    folder_paths: dict[str, str],
+) -> list[dict[str, Any]]:
+    latest_by_path: dict[str, dict[str, Any]] = {}
+    for meta in document_metas:
+        enriched = enrich_document_meta(meta, folder_paths)
+        logical_path = normalized_lookup_key(enriched.get("doc_path"))
+        if not logical_path:
+            logical_path = f"doc:{normalized_lookup_key(enriched.get('doc_id'))}"
+        current = latest_by_path.get(logical_path)
+        if current is None or document_meta_sort_key(enriched) >= document_meta_sort_key(current):
+            latest_by_path[logical_path] = enriched
+    return list(latest_by_path.values())
+
+
 def folder_documents(
    document_metas: Iterable[dict[str, Any]],
    folders: Iterable[dict[str, Any]],
@@ -422,14 +460,30 @@ def folder_documents(
        return [], None, ambiguous

    docs = [
-        enrich_document_meta(meta, folder_paths)
-        for meta in document_metas
+        meta
+        for meta in dedupe_document_metas_by_logical_path(document_metas, folder_paths)
        if meta.get("folder_id") == folder.get("folder_id")
    ]
-    docs.sort(key=lambda item: item.get("updated_at") or 0, reverse=True)
+    docs.sort(key=document_meta_sort_key, reverse=True)
    return docs, {**folder, "path": folder_paths.get(folder["folder_id"], "")}, []


+def document_meta_by_id(
+    document_metas: Iterable[dict[str, Any]],
+    folders: Iterable[dict[str, Any]],
+    doc_id: str,
+) -> dict[str, Any] | None:
+    _, folder_paths = build_folder_indexes(folders)
+    matches = [
+        enrich_document_meta(meta, folder_paths)
+        for meta in document_metas
+        if meta.get("doc_id") == doc_id
+    ]
+    if not matches:
+        return None
+    return max(matches, key=document_meta_sort_key)
+
+
 def iter_nodes(nodes: Iterable[dict[str, Any]], path: tuple[int, ...] = ()) -> Iterable[tuple[tuple[int, ...], dict[str, Any]]]:
    for index, node in enumerate(nodes):
        current_path = path + (index,)
@@ -665,12 +719,22 @@ def looks_like_daily_title(
    title = title.strip()
    if not title:
        return False
-    if not DAILY_TITLE_RE.match(title):
+    if not any(pattern.match(title) for pattern in DAILY_TITLE_PATTERNS):
        return False
    lowered = title.casefold()
    return not any(keyword.casefold() in lowered for keyword in exclude_keywords)


+def looks_like_daily_folder_name(
+    name: str | None,
+    keywords: Iterable[str] = DEFAULT_DAILY_FOLDER_KEYWORDS,
+) -> bool:
+    normalized_name = normalized_lookup_key(name)
+    if not normalized_name:
+        return False
+    return any(keyword.casefold() in normalized_name for keyword in keywords)
+
+
 def choose_current_daily_document(
    docs: Iterable[dict[str, Any]],
    allow_non_daily_titles: bool = False,
@@ -947,8 +1011,8 @@ def resolve_document_reference(
    folders: Iterable[dict[str, Any]],
    doc_ref: str,
 ) -> tuple[dict[str, Any] | None, list[dict[str, Any]]]:
-    folder_by_id, folder_paths = build_folder_indexes(folders)
-    metas = [enrich_document_meta(meta, folder_paths) for meta in document_metas]
+    _, folder_paths = build_folder_indexes(folders)
+    metas = dedupe_document_metas_by_logical_path(document_metas, folder_paths)

    by_id = [meta for meta in metas if meta.get("doc_id") == doc_ref]
    if len(by_id) == 1:
@@ -1020,7 +1084,7 @@ def document_links(
                    links.append(
                        {
                            "source_doc_id": doc_id,
-                            "source_doc_title": document.get("title"),
+                            "source_doc_title": title_lookup.get(doc_id) or document.get("title"),
                            "source_node_id": node.get("id"),
                            "source_path": list(path),
                            "source_field": field,
@@ -1485,7 +1549,11 @@ def build_parser() -> argparse.ArgumentParser:

    daily_parser = subparsers.add_parser("daily", help="Find Daily-style folders and list the documents inside them.")
    daily_parser.add_argument("--storage-root", type=Path, default=DEFAULT_STORAGE_ROOT)
-    daily_parser.add_argument("--query", default="daily")
+    daily_parser.add_argument(
+        "--query",
+        default=None,
+        help="Optional folder-name substring filter. Defaults to built-in daily-folder heuristics.",
+    )
    daily_parser.add_argument("--limit", type=int, default=50)
    daily_parser.add_argument("--json", action="store_true")

@@ -1605,7 +1673,17 @@ def main(argv: list[str] | None = None) -> int:

    if args.command == "show":
        documents = load_latest_backups(args.root)
-        payload = show_document(documents, args.doc_id, max_depth=args.max_depth)
+        metas = load_document_metas(DEFAULT_STORAGE_ROOT)
+        folders = load_folders(DEFAULT_STORAGE_ROOT)
+        meta = document_meta_by_id(metas, folders, args.doc_id)
+        payload = show_document(
+            documents,
+            args.doc_id,
+            max_depth=args.max_depth,
+            title_override=meta.get("title") if meta else None,
+            folder_path=meta.get("folder_path") if meta else None,
+            doc_path=meta.get("doc_path") if meta else None,
+        )
        if payload is None:
            parser.error(f"document not found: {args.doc_id}")
        dump_output(payload, args.json)
@@ -1639,14 +1717,11 @@ def main(argv: list[str] | None = None) -> int:
        folders = load_folders(args.storage_root)
        _, folder_paths = build_folder_indexes(folders)
        payload = [
-            {
-                **meta,
-                "folder_path": folder_paths.get(meta.get("folder_id", ""), ""),
-            }
-            for meta in metas
+            meta
+            for meta in dedupe_document_metas_by_logical_path(metas, folder_paths)
            if meta.get("folder_id") == args.folder_id
        ]
-        payload.sort(key=lambda item: item.get("updated_at") or 0, reverse=True)
+        payload.sort(key=document_meta_sort_key, reverse=True)
        dump_output(payload[: args.limit], args.json)
        return 0

@@ -1692,21 +1767,33 @@ def main(argv: list[str] | None = None) -> int:
        folders = load_folders(args.storage_root)
        metas = load_document_metas(args.storage_root)
        _, folder_paths = build_folder_indexes(folders)
-        matched_folders = [
-            folder
-            for folder in folders
-            if args.query.lower() in (folder.get("name") or "").lower()
-        ]
+        logical_metas = dedupe_document_metas_by_logical_path(metas, folder_paths)
+        docs_by_folder: dict[str, list[dict[str, Any]]] = {}
+        for meta in logical_metas:
+            folder_id = meta.get("folder_id")
+            if isinstance(folder_id, str):
+                docs_by_folder.setdefault(folder_id, []).append(meta)
+        if args.query:
+            query = normalized_lookup_key(args.query)
+            matched_folders = [
+                folder
+                for folder in folders
+                if query in normalized_lookup_key(folder.get("name"))
+            ]
+        else:
+            matched_folders = [
+                folder
+                for folder in folders
+                if looks_like_daily_folder_name(folder.get("name"))
+                or choose_current_daily_document(docs_by_folder.get(folder.get("folder_id"), []))[0] is not None
+            ]
        matched_ids = {folder["folder_id"] for folder in matched_folders}
        docs = [
-            {
-                **meta,
-                "folder_path": folder_paths.get(meta.get("folder_id", ""), ""),
-            }
-            for meta in metas
+            meta
+            for meta in logical_metas
            if meta.get("folder_id") in matched_ids
        ]
-        docs.sort(key=lambda item: item.get("updated_at") or 0, reverse=True)
+        docs.sort(key=document_meta_sort_key, reverse=True)
        payload = {
            "folders": [
                {**folder, "path": folder_paths.get(folder["folder_id"], "")}