From 8cdbafe301a9f4ee81c9e4b3bc2777ff56d316d3 Mon Sep 17 00:00:00 2001
From: haorui-harry <2224882012@qq.com>
Date: Mon, 4 May 2026 18:21:42 +0800
Subject: [PATCH] fix(macrocli): address remaining review blockers

- Remove duplicate macro_record implementation in macrocli_cli.py
  (stale second implementation from lines 582-706 that would cause
  duplicate saves and unexpected behavior)
- Update MACROCLI.md: document visual_anchor and gui_agent backends,
  optional dependencies ([visual], [gui_agent]), and OpenAI-compatible
  LLM configuration (MACROCLI_MODEL/API_KEY/BASE_URL)
- Resolve .gitignore conflicts: add sbox and quietshrink directories
  from upstream while preserving macrocli entries (Step 4/5/6 pattern)
---
 .gitignore                                    |   8 ++
 macrocli/agent-harness/MACROCLI.md            |  35 ++++-
 .../cli_anything/macrocli/macrocli_cli.py     | 125 ------------------
 3 files changed, 38 insertions(+), 130 deletions(-)

diff --git a/.gitignore b/.gitignore
index 7c42c6118..f1d134515 100644
--- a/.gitignore
+++ b/.gitignore
@@ -83,6 +83,8 @@
 !/nsight-graphics/
 !/lldb/
 !/macrocli/
+!/sbox/
+!/quietshrink/
 
 # Step 5: Inside each software dir, ignore everything (including dotfiles)
 /gimp/*
@@ -173,6 +175,10 @@
 /lldb/.*
 /macrocli/*
 /macrocli/.*
+/sbox/*
+/sbox/.*
+/quietshrink/*
+/quietshrink/.*
 
 # Step 6: ...except agent-harness/
 !/gimp/agent-harness/
@@ -225,6 +231,8 @@
 !/nsight-graphics/agent-harness/
 !/lldb/agent-harness/
 !/macrocli/agent-harness/
+!/sbox/agent-harness/
+!/quietshrink/agent-harness/
 
 # Exclude non-gedit demo macros from macrocli (local only)
 /macrocli/agent-harness/cli_anything/macrocli/macro_definitions/demo/flameshot*
diff --git a/macrocli/agent-harness/MACROCLI.md b/macrocli/agent-harness/MACROCLI.md
index e2c684c68..38a22f2d1 100644
--- a/macrocli/agent-harness/MACROCLI.md
+++ b/macrocli/agent-harness/MACROCLI.md
@@ -40,7 +40,7 @@ Agent
 | L5 | Macro Execution Runtime | `core/runtime.py` |
 | L4 | Parameterized Macro Model | `core/macro_model.py` + `macro_definitions/*.yaml` |
 | L3 | Backend Routing Engine | `core/routing.py` |
-| L2 | Execution Backends | `backends/` (5 backends) |
+| L2 | Execution Backends | `backends/` (7 backends) |
 | L1 | Target Application | Any GUI-first or closed-source app |
 
 ## Execution Backends
@@ -49,7 +49,9 @@ Agent
 |---------|----------|---------|----------|
 | `native_api` | 100 | `backend: native_api` | subprocess / shell commands |
 | `gui_macro` | 80 | `backend: gui_macro` | precompiled coordinate replay (pyautogui) |
+| `visual_anchor` | 75 | `backend: visual_anchor` | template-matching click/type (requires `[visual]`) |
 | `file_transform` | 70 | `backend: file_transform` | XML, JSON, text file editing |
+| `gui_agent` | 60 | `backend: gui_agent` | vision-model-driven automation (requires `[gui_agent]`) |
 | `semantic_ui` | 50 | `backend: semantic_ui` | accessibility API + keyboard (xdotool) |
 | `recovery` | 10 | `backend: recovery` | retry + fallback orchestration |
 
@@ -135,8 +137,10 @@ macrocli/
         │   ├── native_api.py             subprocess backend
         │   ├── file_transform.py         XML/JSON/text backend
         │   ├── semantic_ui.py            accessibility backend
+        │   ├── visual_anchor.py          template-matching backend
+        │   ├── gui_agent.py              vision-model automation backend
         │   ├── gui_macro.py              compiled replay backend
-        │   └── recovery.py              retry/fallback backend
+        │   └── recovery.py               retry/fallback backend
         ├── skills/SKILL.md               Agent-readable skill definition
         ├── utils/repl_skin.py            Unified REPL skin (cli-anything standard)
         └── tests/
@@ -153,7 +157,26 @@ pip install -e .
 
 **Runtime dependencies:** Python 3.10+, PyYAML, click, prompt-toolkit.
 
-**Optional (for specific backends):**
+**Optional extras:**
+
+```bash
+pip install -e ".[visual]"      # visual_anchor backend (mss, Pillow, numpy, pynput)
+pip install -e ".[gui_agent]"   # gui_agent backend     (openai, mss, Pillow)
+pip install -e ".[all]"         # everything
+```
+
+**gui_agent backend configuration:**
+
+The `gui_agent` backend uses the OpenAI SDK and is compatible with any
+OpenAI-compatible API. Configure via environment variables:
+
+| Variable           | Description                                 |
+|--------------------|---------------------------------------------|
+| `MACROCLI_MODEL`   | Model name (required, e.g. `gpt-4o`)        |
+| `MACROCLI_API_KEY` | API key for the provider                    |
+| `MACROCLI_BASE_URL`| Base URL (only needed for non-OpenAI hosts) |
+
+**Other optional dependencies:**
 - `xdotool` — semantic_ui backend on Linux
 - `pyautogui` — gui_macro backend
 - `psutil` — richer process_running checks
@@ -172,9 +195,11 @@ python3 -m pytest cli_anything/macrocli/tests/ -v -s
 running code, inspectable via `macro info`, and editable without touching the
 harness source.
 
-**Why 5 backends?** Real GUI applications expose many different control
+**Why 7 backends?** Real GUI applications expose many different control
 surfaces. The routing engine picks the most reliable one available — the agent
-doesn't need to know which one ran.
+doesn't need to know which one ran. The `visual_anchor` backend uses template
+matching for robust UI element detection, while `gui_agent` uses vision models
+for dynamic decision-making when the UI state is unpredictable.
 
 **Why preconditions and postconditions?** Agents operate in environments where
 state is uncertain. Failing loudly before execution (preconditions) and
diff --git a/macrocli/agent-harness/cli_anything/macrocli/macrocli_cli.py b/macrocli/agent-harness/cli_anything/macrocli/macrocli_cli.py
index bd27e7de6..87648187e 100644
--- a/macrocli/agent-harness/cli_anything/macrocli/macrocli_cli.py
+++ b/macrocli/agent-harness/cli_anything/macrocli/macrocli_cli.py
@@ -579,131 +579,6 @@ def macro_record(name, output_dir, timeout, do_agent_review,
                 if parameters else ""
             )
         )
-    """Record GUI interactions and generate a macro YAML.
-
-    \b
-    Start recording, perform your GUI workflow, then press Ctrl+Alt+S to stop.
-    Each click is anchored to the focused window (robust to window movement).
-
-    \b
-    Examples:
-      # Basic recording
-      macro record my_export
-
-      # Record + interactively parameterize typed values
-      macro record my_export --parameterize
-
-      # Record + auto-parameterize with LLM
-      macro record my_export --auto-parameterize --api-key $MACROCLI_API_KEY
-
-    Requires: pip install mss Pillow pynput
-    """
-    try:
-        from cli_anything.macrocli.core.recorder import MacroRecorder
-    except ImportError as e:
-        click.echo(f"Error: {e}", err=True)
-        sys.exit(1)
-
-    if do_parameterize and do_auto_param:
-        click.echo(
-            "Error: --parameterize and --auto-parameterize are mutually exclusive.",
-            err=True,
-        )
-        sys.exit(1)
-
-    recorder = MacroRecorder(macro_name=name, output_dir=output_dir)
-
-    if not _json_output:
-        click.echo(f"Recording '{name}'. Press Ctrl+Alt+S to stop...")
-
-    try:
-        recorder.record(timeout_s=timeout if timeout > 0 else None)
-    except Exception as e:
-        if _json_output:
-            output({"error": str(e), "success": False})
-        else:
-            click.echo(f"Error during recording: {e}", err=True)
-        if not _repl_mode:
-            sys.exit(1)
-        return
-
-    # ── Parameterization phase ────────────────────────────────────────────────
-    parameters = None
-    type_steps = recorder.get_type_steps()
-
-    if do_auto_param and type_steps:
-        try:
-            from cli_anything.macrocli.core.parameterize import (
-                llm_suggest_parameters,
-                interactive_parameterize,
-            )
-            if not _json_output:
-                click.echo(f"\nAsking LLM to suggest parameters for "
-                           f"{len(type_steps)} type_text step(s)...")
-            suggestions = llm_suggest_parameters(
-                type_steps, api_key=api_key
-            )
-            if suggestions and not _json_output:
-                click.echo("  LLM suggestions:")
-                for idx, pname in suggestions.items():
-                    step = recorder._steps[idx]
-                    click.echo(f"    step {idx+1} {step.text!r} → ${{{pname}}}")
-                click.echo()
-                # Still allow user to confirm/adjust
-                confirmed = interactive_parameterize(
-                    [(i, s) for i, s in type_steps if i in suggestions],
-                    existing_params=set(),
-                )
-                # For steps LLM suggested but user skipped, remove them
-                final = {i: n for i, n in suggestions.items() if i in confirmed}
-                # For steps user renamed, use their name
-                final.update(confirmed)
-                parameters = recorder.apply_parameterization(final)
-            elif not suggestions and not _json_output:
-                click.echo("  LLM found no values to parameterize.")
-        except Exception as e:
-            click.echo(f"  Warning: LLM parameterization failed: {e}", err=True)
-            click.echo("  Falling back to interactive mode...")
-            do_parameterize = True
-
-    if do_parameterize and type_steps:
-        from cli_anything.macrocli.core.parameterize import interactive_parameterize
-        assignments = interactive_parameterize(type_steps)
-        if assignments:
-            parameters = recorder.apply_parameterization(assignments)
-
-    try:
-        yaml_path = recorder.save(parameters=parameters)
-    except Exception as e:
-        if _json_output:
-            output({"error": str(e), "success": False})
-        else:
-            click.echo(f"Error saving macro: {e}", err=True)
-        if not _repl_mode:
-            sys.exit(1)
-        return
-
-    if _json_output:
-        output({
-            "success": True,
-            "yaml_path": yaml_path,
-            "steps": len(recorder._steps),
-            "parameters": list((parameters or {}).keys()),
-            "templates_dir": str(recorder.templates_dir),
-        })
-    else:
-        click.echo(f"✓ Saved {len(recorder._steps)} steps to: {yaml_path}")
-        if parameters:
-            click.echo(f"  Parameters: {', '.join(parameters.keys())}")
-        click.echo(f"  Templates: {recorder.templates_dir}/")
-        click.echo(
-            "\n  Run it with:\n"
-            f"  macro run {name} --macro-file {yaml_path}"
-            + (
-                "".join(f" --param {k}=<value>" for k in (parameters or {}))
-                if parameters else ""
-            )
-        )
 
 
 @macro.command("parameterize")