fix(shader-compiler): preserve args for builtin-alias and user-fn-alias macro calls (#3001)

* fix(shader-compiler): enforce shift for known LALR conflicts
This commit is contained in:
zhuxudong
2026-05-14 11:51:07 +08:00
committed by GitHub
parent bba09be10e
commit 66ea595d0a
8 changed files with 324 additions and 78 deletions

View File

@@ -30,37 +30,50 @@ export class ParserUtils {
}
/**
* Return the lexeme of `expr` when it is (transitively) a single bare identifier
* wrapped in primary/postfix expression nodes, e.g. `o` → "o". Returns null for
* compound expressions (`o.x`, `foo(..)`, `arr[0]`, …). Useful for callers that
* want to apply a substitution rule only at the root of an expression, never on
* swizzles or nested member access.
* Walk single-child precedence-chain wrappers down to a `VariableIdentifier`,
* returning the leaf node (or `undefined` for any compound expression).
*
* `allowParens` chooses between two callers' needs:
* - `true` — descend through `( expression )` form, so `(v)` resolves to
* `v`. Use when the caller substitutes at the expression root
* (aliasing, renaming); user-written parens carry no extra meaning.
* - `false` — any 3-child `PrimaryExpression` aborts; `(v)` stays
* compound. Use when the caller treats the unwrapped node as a single
* token (IO-struct arg drop, alias detection); user-written parens
* must not collapse.
*/
static unwrapBareIdentifier(
node: TreeNode,
options: { allowParens: boolean }
): ASTNode.VariableIdentifier | undefined {
let cur: TreeNode = node;
while (true) {
if (cur instanceof ASTNode.VariableIdentifier) return cur;
if (options.allowParens && cur instanceof ASTNode.PrimaryExpression && cur.children.length === 3) {
const inner = cur.children[1];
if (!(inner instanceof TreeNode)) return undefined;
cur = inner;
continue;
}
if (cur instanceof ASTNode.ExpressionAstNode && cur.children.length === 1) {
const child = cur.children[0];
if (!(child instanceof TreeNode)) return undefined;
cur = child;
continue;
}
return undefined;
}
}
/**
* Lexeme variant of `unwrapBareIdentifier({ allowParens: true })` for callers
* that already work with strings. Returns `null` for compound expressions.
*/
static extractDirectIdentLexeme(expr: TreeNode): string | null {
let cur: TreeNode | Token = expr;
while (cur) {
if (cur instanceof ASTNode.VariableIdentifier) {
const child = cur.children[0];
return child instanceof Token ? child.lexeme : null;
}
// `( expression )` form on PrimaryExpression — descend through the
// wrapped Expression so `(v)` resolves to `v`. All other compound
// forms (member access, function call, comma, …) bail out.
if (cur instanceof ASTNode.PrimaryExpression && cur.children.length === 3) {
cur = cur.children[1];
continue;
}
// Any precedence-chain wrapper with a single child forwards through.
// Covers PostfixExpression / PrimaryExpression(len=1) / Assignment /
// Conditional / LogicalOr / … all the way down — including the verbose
// build's full chain and the release build's elision-flattened chain.
if (cur instanceof ASTNode.ExpressionAstNode && cur.children.length === 1) {
cur = cur.children[0];
continue;
}
return null;
}
return null;
const ident = ParserUtils.unwrapBareIdentifier(expr, { allowParens: true });
if (!ident) return null;
const child = ident.children[0];
return child instanceof Token ? child.lexeme : null;
}
// #if _VERBOSE

View File

@@ -27,11 +27,14 @@ export class ShaderCompilerUtils {
source: string,
location: ShaderRange | ShaderPosition,
file?: string
): Error | undefined {
): Error {
// #if _VERBOSE
return new GSError(errorName, message, location, source, file);
// #else
console.error(message);
const err = new Error(message);
err.name = errorName;
return err;
// #endif
}
}

View File

@@ -147,39 +147,26 @@ export abstract class CodeGenVisitor {
const astNodes = paramList.paramNodes;
const context = VisitorContext.context;
// `MacroCallFunction` covers two call shapes sharing the same AST:
// (a) object-like macro whose value is a function name, used as a call —
// `#define FN foo` + `FN(varyings, …)`. The driver expands `FN` to
// `foo`, and `foo` is a shader function whose IO-struct params
// have been flattened. The call site must drop IO-struct args to
// match the flattened signature — same rule as `visitFunctionCall`.
// (b) true function-like macro — `#define MAX3(a,b,c) …` + `MAX3(v.x, …)`.
// The shader compiler doesn't expand the macro; the driver does, and the
// `#define` fixes the parameter count. Args must be preserved
// verbatim — a member-access arg like `v.v_uv` unwraps to a root
// identifier whose type is an IO struct, but dropping the arg
// would change the macro's arity.
//
// `isFunctionLikeMacro` is set by `MacroCallSymbol.semanticAnalyze` from
// `macroDefineList[name][*].isFunction` and carries the definition shape.
const params = node.isFunctionLikeMacro
? astNodes
: astNodes.filter((arg) => {
if (arg instanceof ASTNode.AssignmentExpression) {
const variableParam = ParserUtils.unwrapNodeByType<ASTNode.VariableIdentifier>(
arg,
NoneTerminal.variable_identifier
);
if (
variableParam &&
typeof variableParam.typeInfo === "string" &&
context.getStructRole(variableParam.typeInfo)
) {
return false;
}
// Drop bare IO-struct args only when the macro aliases a user fn (whose
// formal was flattened). All other shapes preserve args verbatim.
let params: typeof astNodes;
if (node.isFunctionLikeMacro || !node.aliasesNonBuiltinIdent) {
params = astNodes;
} else {
params = astNodes.filter((arg) => {
if (arg instanceof ASTNode.AssignmentExpression) {
const variableParam = ParserUtils.unwrapBareIdentifier(arg, { allowParens: false });
if (
variableParam &&
typeof variableParam.typeInfo === "string" &&
context.getStructRole(variableParam.typeInfo)
) {
return false;
}
return true;
});
}
return true;
});
}
let paramsCode = "";
for (let i = 0, length = params.length; i < length; i++) {

View File

@@ -2,6 +2,7 @@ import { ETokenType } from "../common";
import { Keyword } from "../common/enums/Keyword";
import { Grammar } from "../parser/Grammar";
import { GrammarSymbol, NoneTerminal, Terminal } from "../parser/GrammarSymbol";
import Production from "./Production";
import State from "./State";
import StateItem from "./StateItem";
import { default as GrammarUtils, default as Utils } from "./Utils";
@@ -157,9 +158,14 @@ export class LALR1 {
private _addAction(table: ActionTable, terminal: Terminal, action: ActionInfo) {
const exist = table.get(terminal);
if (exist && !Utils.isActionEqual(exist, action)) {
// Resolve dangling else ambiguity
if (terminal === Keyword.ELSE && exist.action === EAction.Shift && action.action === EAction.Reduce) {
return;
// Known shift-preferred conflicts (see TargetParser.y `%expect 2`).
// Enforce shift regardless of the order `_inferNextState` registers
// actions: when `exist` is already Shift and `action` is Reduce, keep
// `exist` (early-return); the reverse order (`exist` Reduce, `action`
// Shift) falls through to `table.set` below and Shift overwrites
// Reduce. Order-independent by construction.
if (LALR1._isKnownShiftPreferred(terminal, exist, action)) {
if (exist.action === EAction.Shift && action.action === EAction.Reduce) return;
} else {
// #if _VERBOSE
console.warn(
@@ -174,6 +180,28 @@ export class LALR1 {
table.set(terminal, action);
}
// Catalog of expected shift/reduce conflicts. Each entry must correspond to
// one of TargetParser.y's `%expect`-ed conflicts; any new conflict not in
// this list falls through to the verbose `conflict detect` warning so the
// grammar/runtime drift is loud rather than silent.
// - ELSE: dangling-else, bind to nearest `if`
// - '(' + `type_specifier_nonarray → macro_call_symbol`: macro-as-type-alias
// (#2974), prefer the expression-position `macro_call_function` over the
// type-position reduce
private static _isKnownShiftPreferred(terminal: Terminal, exist: ActionInfo, action: ActionInfo): boolean {
if (terminal === Keyword.ELSE) return true;
if (terminal !== ETokenType.LEFT_PAREN) return false;
const reduce = exist.action === EAction.Reduce ? exist : action.action === EAction.Reduce ? action : null;
if (!reduce) return false;
const prod = Production.pool.get(reduce.target!);
return (
!!prod &&
prod.goal === NoneTerminal.type_specifier_nonarray &&
prod.derivation.length === 1 &&
prod.derivation[0] === NoneTerminal.macro_call_symbol
);
}
// https://people.cs.pitt.edu/~jmisurda/teaching/cs1622/handouts/cs1622-first_and_follow.pdf
private computeFirstSet() {
for (const production of this.grammar.productions.slice(1)) {

View File

@@ -1661,15 +1661,16 @@ export namespace ASTNode {
* a root of `referenceSymbolNames`. Mixed forms across branches → false,
* fall back to legacy inference. */
hasAstValue: boolean = false;
/** True when the macro is defined as function-like (`#define NAME(params) …`).
* Used by `MacroCallFunction` codegen to pick between the two call shapes
* — object-like-macro-as-function-name vs true function-like macro. */
/** `#define NAME(params) …` form — drives function-like vs object-like codegen. */
isFunctionLikeMacro: boolean = false;
/** Every visible replacement is a non-builtin identifier — assume user fn alias. */
aliasesNonBuiltinIdent: boolean = false;
override init(): void {
this.referenceSymbolNames.length = 0;
this.hasAstValue = false;
this.isFunctionLikeMacro = false;
this.aliasesNonBuiltinIdent = false;
}
override semanticAnalyze(sa: SemanticAnalyzer): void {
@@ -1689,6 +1690,7 @@ export namespace ASTNode {
let visibleCount = 0;
let allAst = true;
let isFn = false;
let allAliasNonBuiltinIdent = true;
if (defList) {
for (let i = 0, n = defList.length; i < n; i++) {
const info = defList[i];
@@ -1702,6 +1704,22 @@ export namespace ASTNode {
if (info.valueAst) {
MacroCallSymbol._collectIdentifierRefs(info.valueAst, info.params, refs);
}
// aliasesNonBuiltinIdent: macro replacement is a single non-builtin
// identifier — best-effort proxy for "this macro call site aliases a
// user fn", since uniform/const aliases would surface as a GLSL
// compile error later anyway. Keyword replacements (`vec3`, `mat4`)
// reach here with `valueAst === undefined` (opaque path), so they
// automatically fail without an explicit keyword guard.
if (info.isFunction || !info.valueAst) {
allAliasNonBuiltinIdent = false;
} else {
const leadingIdent = ParserUtils.unwrapBareIdentifier(info.valueAst, { allowParens: false });
const leadingChild = leadingIdent?.children[0];
const leadingId = leadingChild instanceof BaseToken ? leadingChild.lexeme : undefined;
if (!leadingId || BuiltinFunction.isExist(leadingId)) {
allAliasNonBuiltinIdent = false;
}
}
}
}
// Require *every* visible entry to be AST-form before taking the AST
@@ -1710,6 +1728,7 @@ export namespace ASTNode {
// instead of polluting the call site with TypeAny.
this.hasAstValue = visibleCount > 0 && allAst;
this.isFunctionLikeMacro = isFn;
this.aliasesNonBuiltinIdent = visibleCount > 0 && allAliasNonBuiltinIdent;
}
/** Push every leaf `VariableIdentifier`'s lexeme into `out`, skipping
@@ -1739,12 +1758,14 @@ export namespace ASTNode {
macroName: string = "";
hasAstValue: boolean = false;
isFunctionLikeMacro: boolean = false;
aliasesNonBuiltinIdent: boolean = false;
override init(): void {
this.referenceSymbolNames = [];
this.macroName = "";
this.hasAstValue = false;
this.isFunctionLikeMacro = false;
this.aliasesNonBuiltinIdent = false;
}
override semanticAnalyze(sa: SemanticAnalyzer): void {
@@ -1754,6 +1775,7 @@ export namespace ASTNode {
this.macroName = child.macroName;
this.hasAstValue = child.hasAstValue;
this.isFunctionLikeMacro = child.isFunctionLikeMacro;
this.aliasesNonBuiltinIdent = child.aliasesNonBuiltinIdent;
}
override codeGen(visitor: CodeGenVisitor) {

View File

@@ -1,4 +1,28 @@
// For cfg conflict test, used by bison
// Bison-format mirror of the runtime grammar in `lalr/CFG.ts`.
//
// This file is consumed *only* by `bison -r all TargetParser.y` to produce a
// human-readable LALR(1) state report — it does NOT generate runtime parser
// code. The runtime grammar lives in `lalr/CFG.ts` and is the source of truth.
//
// Keep this file in sync with `CFG.ts` so the bison conflict report reflects
// the actual state machine the runtime LALR1 builder produces. Naming
// differences are tolerated (e.g. `eq` here ↔ `EQ_OP` in CFG.ts), but the
// *set* of productions per non-terminal and the symbol *structure* of each
// alternative must match.
// Two expected shift/reduce conflicts — bison errors if the count diverges,
// catching new unintended ambiguity at grammar-change time:
// 1. `ELSE` in `selection_statement` — classic dangling-else, shift binds
// the `else` to the nearest `if`. Standard C/GLSL resolution.
// 2. `(` in `type_specifier_nonarray → macro_call_symbol` (since #2974) —
// at a `macro_call_symbol .` item with `(` lookahead, shift forms a
// `macro_call_function` (expression-position macro call) rather than
// reducing to `type_specifier_nonarray`. Matches the AST node identity
// design intent of #2974.
// Both are handled deterministically by `LALR1._isKnownShiftPreferred`
// (lalr/LALR1.ts) — runtime resolution is order-independent, not reliant on
// bison's implicit shift-wins default.
%expect 2
%token id
%token INT_CONSTANT
@@ -8,11 +32,48 @@
%token void
%token float
%token bool
%token int
%token uint
%token vec2
%token vec3
%token vec4
%token bvec2
%token bvec3
%token bvec4
%token ivec2
%token ivec3
%token ivec4
%token uvec2
%token uvec3
%token uvec4
%token mat2
%token mat3
%token mat4
%token mat2x3
%token mat2x4
%token mat3x2
%token mat3x4
%token mat4x2
%token mat4x3
%token sampler2D
%token sampler3D
%token samplerCube
%token sampler2DShadow
%token samplerCubeShadow
%token sampler2DArray
%token sampler2DArrayShadow
%token isampler2D
%token isampler3D
%token isamplerCube
%token isampler2DArray
%token usampler2D
%token usampler3D
%token usamplerCube
%token usampler2DArray
%token struct
%token highp
%token mediemp
%token mediump
%token lowp
%token const
@@ -168,8 +229,45 @@ precision_specifier:
ext_builtin_type_specifier_nonarray:
void
| float
| int
| bool
| int
| uint
| vec2
| vec3
| vec4
| bvec2
| bvec3
| bvec4
| ivec2
| ivec3
| ivec4
| uvec2
| uvec3
| uvec4
| mat2
| mat3
| mat4
| mat2x3
| mat2x4
| mat3x2
| mat3x4
| mat4x2
| mat4x3
| sampler2D
| sampler3D
| samplerCube
| sampler2DShadow
| samplerCubeShadow
| sampler2DArray
| sampler2DArrayShadow
| isampler2D
| isampler3D
| isamplerCube
| isampler2DArray
| usampler2D
| usampler3D
| usamplerCube
| usampler2DArray
;
type_specifier_nonarray:
@@ -185,8 +283,8 @@ type_specifier_nonarray:
;
struct_specifier:
struct id '{' struct_declaration_list '}' ;
| struct '{' struct_declaration_list '}' ;
struct id '{' struct_declaration_list '}' ';'
| struct '{' struct_declaration_list '}' ';'
;
struct_declaration_list:
@@ -216,7 +314,7 @@ macro_struct_branch:
layout_qualifier:
layout '(' location '=' INT_CONSTANT ')'
| layout '(' location '=' id ')'
;
struct_declarator_list:
@@ -241,7 +339,7 @@ type_specifier:
precision_qualifier:
highp
| mediemp
| mediump
| lowp
;
@@ -421,7 +519,11 @@ function_call:
function_call_generic:
function_identifier '(' function_call_parameter_list ')'
| function_identifier '(' ')'
| function_identifier '(' void ')'
// Mirrors CFG.ts:681 verbatim. This alt is unreachable from any legal
// GLSL token stream (lexer never produces `f VOID )` without `(` between)
// — present since the LALR refactor (#2113, 2024-07). Tracked as latent
// bug; fix belongs in a separate PR with a `f(void)` regression test.
| function_identifier void ')'
;
function_call_parameter_list:
@@ -555,9 +657,9 @@ simple_statement:
declaration:
function_prototype ';'
| init_declarator_list ';'
| PRECISION precision_qualifier ext_builtin_type_specifier_nonarray ';'
| type_qualifier id ';'
| type_qualifier id identifier_list ';'
| precision_specifier
;
identifier_list:

View File

@@ -9,7 +9,7 @@ import { ShaderMacroProcessor } from "@galacean/engine-core/src/shader/ShaderMac
import { Logger, WebGLEngine } from "@galacean/engine";
import { server } from "@vitest/browser/context";
import { describe, expect, it } from "vitest";
import { beforeAll, describe, expect, it } from "vitest";
const { readFile } = server.commands;
Logger.enable();
@@ -1185,6 +1185,51 @@ describe("ShaderCompiler Precompile", async () => {
});
});
// After LALR enforces shift on `macro_call_symbol . '('`, the visitor at
// `K(args)` must distinguish three callee kinds derived from the macro's
// replacement and apply IO-struct formal flattening only when the callee is
// a user-declared function. Earlier the filter ran for every object-like
// macro and silently dropped member-access / literal args (e.g. emitting
// `vec3()` from `MyVec3(attr.POSITION.x, attr.POSITION.y, attr.POSITION.z)`).
describe("MacroCallFunction calleeKind-aware arg filtering", () => {
let evaluated: string;
beforeAll(async () => {
const source = await readFile("./shaders/macro-as-type-args.shader");
const precompiled = shaderCompiler._precompile(source, ShaderLanguage.GLSLES100);
const vi = precompiled.subShaders[0].passes[0].vertexShaderInstructions ?? [];
evaluated = ShaderMacroProcessor.evaluate(vi as any, new Map());
});
// (1) builtin type alias: constructor call, no formal flattening
it("builtin type alias keeps member-access args", () => {
// After flatten, `attr.POSITION.x/y/z` → `POSITION.x/y/z`.
expect(evaluated).toMatch(/vec3\s*\(\s*POSITION\.x\s*,\s*POSITION\.y\s*,\s*POSITION\.z\s*\)/);
});
it("builtin type alias keeps literal args", () => {
expect(evaluated).toMatch(/vec3\s*\(\s*0\.1\s*,\s*0\.2\s*,\s*0\.3\s*\)/);
});
it("builtin type alias never emits an empty constructor call", () => {
// Defends against the original bug where every arg was filtered.
expect(evaluated).not.toMatch(/vec3\s*\(\s*\)/);
});
// (2) builtin function alias: args are values, no flattening
it("builtin function alias keeps all args verbatim", () => {
expect(evaluated).toMatch(/mix\s*\(\s*v_member\s*,\s*v_literal\s*,\s*0\.5\s*\)/);
});
// (3) user-fn alias: formal flattening DOES apply — bare IO-struct arg
// must be dropped so the call site matches the flattened signature.
it("user-fn alias drops bare IO-struct arg paired with flattened formal", () => {
// `computeColor(Attributes a)` flattens to `computeColor()` and call
// becomes `computeColor()` (the `attr` arg drops).
expect(evaluated).toMatch(/computeColor\s*\(\s*\)/);
// The macro name `MyHelper` should still appear in the macro define
// table (the directive itself), but never as `MyHelper(attr)` text.
expect(evaluated).not.toMatch(/MyHelper\s*\(\s*attr\s*\)/);
});
});
// ─────────────────────────────────────────────────────────
// 6. Shader._createFromPrecompiled()
// ─────────────────────────────────────────────────────────

View File

@@ -0,0 +1,46 @@
Shader "macro-callee-kinds" {
SubShader "Default" {
Pass "test" {
mat4 renderer_MVPMat;
// Object-like macros covering the three callee kinds the visitor must
// distinguish at a `K(args)` call site (LALR enforces shift to
// `macro_call_function`, so this exercises the resulting visitor):
// (1) builtin type alias — constructor call, no formal flattening
#define MyVec3 vec3
// (2) builtin function alias — args are values, no flattening
#define MyMix mix
// (3) user-declared function alias — formals flattened, bare IO-struct
// args must be dropped to match flattened signature
#define MyHelper computeColor
struct Attributes { vec3 POSITION; };
// helper used via macro alias `MyHelper`; its only formal is an IO struct,
// so it gets flattened away and its body reads `POSITION.xyz` directly.
vec3 computeColor(Attributes a) {
return a.POSITION.xyz;
}
void vert(Attributes attr) {
// (1) builtin type via member access — args must be preserved
vec3 v_member = MyVec3(attr.POSITION.x, attr.POSITION.y, attr.POSITION.z);
// (1) builtin type via literals — args must be preserved
vec3 v_literal = MyVec3(0.1, 0.2, 0.3);
// (2) builtin fn alias — args must be preserved verbatim
vec3 v_mix = MyMix(v_member, v_literal, 0.5);
// (3) user-fn alias with bare IO struct — formal-paired, MUST be
// dropped so codegen emits `MyHelper()` matching flattened decl
vec3 v_user = MyHelper(attr);
gl_Position = renderer_MVPMat * vec4(v_member + v_literal + v_mix + v_user, 1.0);
}
void frag() { gl_FragColor = vec4(1.0); }
VertexShader = vert;
FragmentShader = frag;
}
}
}