From 8cdbafe301a9f4ee81c9e4b3bc2777ff56d316d3 Mon Sep 17 00:00:00 2001 From: haorui-harry <2224882012@qq.com> Date: Mon, 4 May 2026 18:21:42 +0800 Subject: [PATCH] fix(macrocli): address remaining review blockers - Remove duplicate macro_record implementation in macrocli_cli.py (stale second implementation from lines 582-706 that would cause duplicate saves and unexpected behavior) - Update MACROCLI.md: document visual_anchor and gui_agent backends, optional dependencies ([visual], [gui_agent]), and OpenAI-compatible LLM configuration (MACROCLI_MODEL/API_KEY/BASE_URL) - Resolve .gitignore conflicts: add sbox and quietshrink directories from upstream while preserving macrocli entries (Step 4/5/6 pattern) --- .gitignore | 8 ++ macrocli/agent-harness/MACROCLI.md | 35 ++++- .../cli_anything/macrocli/macrocli_cli.py | 125 ------------------ 3 files changed, 38 insertions(+), 130 deletions(-) diff --git a/.gitignore b/.gitignore index 7c42c6118..f1d134515 100644 --- a/.gitignore +++ b/.gitignore @@ -83,6 +83,8 @@ !/nsight-graphics/ !/lldb/ !/macrocli/ +!/sbox/ +!/quietshrink/ # Step 5: Inside each software dir, ignore everything (including dotfiles) /gimp/* @@ -173,6 +175,10 @@ /lldb/.* /macrocli/* /macrocli/.* +/sbox/* +/sbox/.* +/quietshrink/* +/quietshrink/.* # Step 6: ...except agent-harness/ !/gimp/agent-harness/ @@ -225,6 +231,8 @@ !/nsight-graphics/agent-harness/ !/lldb/agent-harness/ !/macrocli/agent-harness/ +!/sbox/agent-harness/ +!/quietshrink/agent-harness/ # Exclude non-gedit demo macros from macrocli (local only) /macrocli/agent-harness/cli_anything/macrocli/macro_definitions/demo/flameshot* diff --git a/macrocli/agent-harness/MACROCLI.md b/macrocli/agent-harness/MACROCLI.md index e2c684c68..38a22f2d1 100644 --- a/macrocli/agent-harness/MACROCLI.md +++ b/macrocli/agent-harness/MACROCLI.md @@ -40,7 +40,7 @@ Agent | L5 | Macro Execution Runtime | `core/runtime.py` | | L4 | Parameterized Macro Model | `core/macro_model.py` + `macro_definitions/*.yaml` | | L3 | Backend Routing Engine | `core/routing.py` | -| L2 | Execution Backends | `backends/` (5 backends) | +| L2 | Execution Backends | `backends/` (7 backends) | | L1 | Target Application | Any GUI-first or closed-source app | ## Execution Backends @@ -49,7 +49,9 @@ Agent |---------|----------|---------|----------| | `native_api` | 100 | `backend: native_api` | subprocess / shell commands | | `gui_macro` | 80 | `backend: gui_macro` | precompiled coordinate replay (pyautogui) | +| `visual_anchor` | 75 | `backend: visual_anchor` | template-matching click/type (requires `[visual]`) | | `file_transform` | 70 | `backend: file_transform` | XML, JSON, text file editing | +| `gui_agent` | 60 | `backend: gui_agent` | vision-model-driven automation (requires `[gui_agent]`) | | `semantic_ui` | 50 | `backend: semantic_ui` | accessibility API + keyboard (xdotool) | | `recovery` | 10 | `backend: recovery` | retry + fallback orchestration | @@ -135,8 +137,10 @@ macrocli/ │ ├── native_api.py subprocess backend │ ├── file_transform.py XML/JSON/text backend │ ├── semantic_ui.py accessibility backend + │ ├── visual_anchor.py template-matching backend + │ ├── gui_agent.py vision-model automation backend │ ├── gui_macro.py compiled replay backend - │ └── recovery.py retry/fallback backend + │ └── recovery.py retry/fallback backend ├── skills/SKILL.md Agent-readable skill definition ├── utils/repl_skin.py Unified REPL skin (cli-anything standard) └── tests/ @@ -153,7 +157,26 @@ pip install -e . **Runtime dependencies:** Python 3.10+, PyYAML, click, prompt-toolkit. -**Optional (for specific backends):** +**Optional extras:** + +```bash +pip install -e ".[visual]" # visual_anchor backend (mss, Pillow, numpy, pynput) +pip install -e ".[gui_agent]" # gui_agent backend (openai, mss, Pillow) +pip install -e ".[all]" # everything +``` + +**gui_agent backend configuration:** + +The `gui_agent` backend uses the OpenAI SDK and is compatible with any +OpenAI-compatible API. Configure via environment variables: + +| Variable | Description | +|--------------------|---------------------------------------------| +| `MACROCLI_MODEL` | Model name (required, e.g. `gpt-4o`) | +| `MACROCLI_API_KEY` | API key for the provider | +| `MACROCLI_BASE_URL`| Base URL (only needed for non-OpenAI hosts) | + +**Other optional dependencies:** - `xdotool` — semantic_ui backend on Linux - `pyautogui` — gui_macro backend - `psutil` — richer process_running checks @@ -172,9 +195,11 @@ python3 -m pytest cli_anything/macrocli/tests/ -v -s running code, inspectable via `macro info`, and editable without touching the harness source. -**Why 5 backends?** Real GUI applications expose many different control +**Why 7 backends?** Real GUI applications expose many different control surfaces. The routing engine picks the most reliable one available — the agent -doesn't need to know which one ran. +doesn't need to know which one ran. The `visual_anchor` backend uses template +matching for robust UI element detection, while `gui_agent` uses vision models +for dynamic decision-making when the UI state is unpredictable. **Why preconditions and postconditions?** Agents operate in environments where state is uncertain. Failing loudly before execution (preconditions) and diff --git a/macrocli/agent-harness/cli_anything/macrocli/macrocli_cli.py b/macrocli/agent-harness/cli_anything/macrocli/macrocli_cli.py index bd27e7de6..87648187e 100644 --- a/macrocli/agent-harness/cli_anything/macrocli/macrocli_cli.py +++ b/macrocli/agent-harness/cli_anything/macrocli/macrocli_cli.py @@ -579,131 +579,6 @@ def macro_record(name, output_dir, timeout, do_agent_review, if parameters else "" ) ) - """Record GUI interactions and generate a macro YAML. - - \b - Start recording, perform your GUI workflow, then press Ctrl+Alt+S to stop. - Each click is anchored to the focused window (robust to window movement). - - \b - Examples: - # Basic recording - macro record my_export - - # Record + interactively parameterize typed values - macro record my_export --parameterize - - # Record + auto-parameterize with LLM - macro record my_export --auto-parameterize --api-key $MACROCLI_API_KEY - - Requires: pip install mss Pillow pynput - """ - try: - from cli_anything.macrocli.core.recorder import MacroRecorder - except ImportError as e: - click.echo(f"Error: {e}", err=True) - sys.exit(1) - - if do_parameterize and do_auto_param: - click.echo( - "Error: --parameterize and --auto-parameterize are mutually exclusive.", - err=True, - ) - sys.exit(1) - - recorder = MacroRecorder(macro_name=name, output_dir=output_dir) - - if not _json_output: - click.echo(f"Recording '{name}'. Press Ctrl+Alt+S to stop...") - - try: - recorder.record(timeout_s=timeout if timeout > 0 else None) - except Exception as e: - if _json_output: - output({"error": str(e), "success": False}) - else: - click.echo(f"Error during recording: {e}", err=True) - if not _repl_mode: - sys.exit(1) - return - - # ── Parameterization phase ──────────────────────────────────────────────── - parameters = None - type_steps = recorder.get_type_steps() - - if do_auto_param and type_steps: - try: - from cli_anything.macrocli.core.parameterize import ( - llm_suggest_parameters, - interactive_parameterize, - ) - if not _json_output: - click.echo(f"\nAsking LLM to suggest parameters for " - f"{len(type_steps)} type_text step(s)...") - suggestions = llm_suggest_parameters( - type_steps, api_key=api_key - ) - if suggestions and not _json_output: - click.echo(" LLM suggestions:") - for idx, pname in suggestions.items(): - step = recorder._steps[idx] - click.echo(f" step {idx+1} {step.text!r} → ${{{pname}}}") - click.echo() - # Still allow user to confirm/adjust - confirmed = interactive_parameterize( - [(i, s) for i, s in type_steps if i in suggestions], - existing_params=set(), - ) - # For steps LLM suggested but user skipped, remove them - final = {i: n for i, n in suggestions.items() if i in confirmed} - # For steps user renamed, use their name - final.update(confirmed) - parameters = recorder.apply_parameterization(final) - elif not suggestions and not _json_output: - click.echo(" LLM found no values to parameterize.") - except Exception as e: - click.echo(f" Warning: LLM parameterization failed: {e}", err=True) - click.echo(" Falling back to interactive mode...") - do_parameterize = True - - if do_parameterize and type_steps: - from cli_anything.macrocli.core.parameterize import interactive_parameterize - assignments = interactive_parameterize(type_steps) - if assignments: - parameters = recorder.apply_parameterization(assignments) - - try: - yaml_path = recorder.save(parameters=parameters) - except Exception as e: - if _json_output: - output({"error": str(e), "success": False}) - else: - click.echo(f"Error saving macro: {e}", err=True) - if not _repl_mode: - sys.exit(1) - return - - if _json_output: - output({ - "success": True, - "yaml_path": yaml_path, - "steps": len(recorder._steps), - "parameters": list((parameters or {}).keys()), - "templates_dir": str(recorder.templates_dir), - }) - else: - click.echo(f"✓ Saved {len(recorder._steps)} steps to: {yaml_path}") - if parameters: - click.echo(f" Parameters: {', '.join(parameters.keys())}") - click.echo(f" Templates: {recorder.templates_dir}/") - click.echo( - "\n Run it with:\n" - f" macro run {name} --macro-file {yaml_path}" - + ( - "".join(f" --param {k}=" for k in (parameters or {})) - if parameters else "" - ) - ) @macro.command("parameterize")