`=` was missing from the Pratt table, causing `a = b;` to fail with "expected `;`, found `=`". Assignment is now BinaryOp::Assign with binding power (2, 2) — lowest precedence, right-associative — so `a = b = c` parses as `a = (b = c)`.
454 lines
20 KiB
EBNF
454 lines
20 KiB
EBNF
(* Flux Language Grammar — Context-Free LL(1) Grammar *)
|
|
(* ================================================================ *)
|
|
(* *)
|
|
(* Notation (ISO/IEC 14977 EBNF): *)
|
|
(* rule = definition ; defines a rule (terminated by ;) *)
|
|
(* a , b concatenation *)
|
|
(* a | b alternation *)
|
|
(* { a } zero or more repetitions of a *)
|
|
(* [ a ] optional a (zero or one) *)
|
|
(* ( a | b ) grouping *)
|
|
(* "literal" terminal string *)
|
|
(* *)
|
|
(* UPPERCASE identifiers are lexical token classes whose value *)
|
|
(* cannot be expressed as a single literal (e.g. IDENT, INT_LIT). *)
|
|
(* They are NOT defined here — see SYNTAX.md. *)
|
|
(* *)
|
|
(* Unique/fixed tokens are written as quoted literals directly. *)
|
|
(* *)
|
|
(* Lowercase identifiers are non-terminals (grammar productions). *)
|
|
|
|
|
|
(* ================================================================ *)
|
|
(* Program (start symbol) *)
|
|
(* ================================================================ *)
|
|
|
|
program = { top_level_def } ;
|
|
|
|
top_level_def = func_def
|
|
| struct_def ;
|
|
|
|
|
|
(* ================================================================ *)
|
|
(* Expressions *)
|
|
(* ================================================================ *)
|
|
|
|
expr = assign_expr ;
|
|
|
|
|
|
(* --- Assignment (lowest-precedence binary operator) --- *)
|
|
(* *)
|
|
(* Uses token `=`; right-associative via recursion. *)
|
|
(* The optional form encodes at-most-one assignment target: chains *)
|
|
(* like `a = b = c` parse as `a = (b = c)` thanks to right *)
|
|
(* recursion. *)
|
|
(* *)
|
|
(* LL(1): after or_expr, peek at next token. *)
|
|
(* "=" → consume and recurse into assign_expr *)
|
|
(* other → return the or_expr as-is *)
|
|
(* "=" is not in FIRST(stmt), so expr_stmt can still be *)
|
|
(* distinguished from other statement kinds. *)
|
|
|
|
assign_expr = or_expr , [ "=" , assign_expr ] ;
|
|
|
|
|
|
(* --- Logical OR (lowest-precedence binary operator) --- *)
|
|
(* *)
|
|
(* Uses keyword `or`; left-associative via iteration. *)
|
|
|
|
or_expr = and_expr , { "or" , and_expr } ;
|
|
|
|
|
|
(* --- Logical AND --- *)
|
|
(* *)
|
|
(* Uses keyword `and`; left-associative via iteration. *)
|
|
|
|
and_expr = bitor_expr , { "and" , bitor_expr } ;
|
|
|
|
|
|
(* --- Bitwise OR --- *)
|
|
|
|
bitor_expr = bitxor_expr , { "|" , bitxor_expr } ;
|
|
|
|
|
|
(* --- Bitwise XOR --- *)
|
|
|
|
bitxor_expr = bitand_expr , { "^" , bitand_expr } ;
|
|
|
|
|
|
(* --- Bitwise AND --- *)
|
|
|
|
bitand_expr = additive_expr , { "&" , additive_expr } ;
|
|
|
|
|
|
(* --- Additive: addition and subtraction --- *)
|
|
|
|
additive_expr = multiplicative_expr ,
|
|
{ ( "+" | "-" ) , multiplicative_expr } ;
|
|
|
|
|
|
(* --- Multiplicative: multiplication, division, modulo --- *)
|
|
|
|
multiplicative_expr = unary_expr ,
|
|
{ ( "*" | "/" | "%" ) , unary_expr } ;
|
|
|
|
|
|
(* --- Unary operators (prefix, right-associative by recursion) --- *)
|
|
(* *)
|
|
(* "!" logical not *)
|
|
(* "~" bitwise not *)
|
|
(* "-" arithmetic negation *)
|
|
(* "*" dereference (pointer indirection) *)
|
|
(* "&" address-of *)
|
|
|
|
unary_expr = "!" , unary_expr
|
|
| "~" , unary_expr
|
|
| "-" , unary_expr
|
|
| "*" , unary_expr
|
|
| "&" , unary_expr
|
|
| postfix_expr ;
|
|
|
|
|
|
(* --- Postfix operators (left-associative via iteration) --- *)
|
|
(* *)
|
|
(* Postfix operators bind tighter than any prefix or binary form. *)
|
|
(* Multiple postfix operations chain left-to-right. *)
|
|
|
|
postfix_expr = primary_expr , { postfix_op } ;
|
|
|
|
postfix_op = "." , IDENT (* member access *)
|
|
| "[" , expr , "]" (* subscript/index *)
|
|
| "(" , arg_list , ")" ; (* function call *)
|
|
|
|
|
|
(* --- Primary expressions (highest precedence) --- *)
|
|
(* *)
|
|
(* LL(1) note: after IDENT, peek at the next token. *)
|
|
(* "{" → parse struct_lit_body (struct literal) *)
|
|
(* other → bare identifier reference *)
|
|
|
|
primary_expr = IDENT , [ struct_lit_body ] (* ident or struct lit *)
|
|
| INT_LIT
|
|
| FLOAT_LIT
|
|
| STRING_LIT
|
|
| CHAR_LIT
|
|
| "true"
|
|
| "false"
|
|
| "(" , expr , ")" ; (* parenthesised *)
|
|
|
|
|
|
(* --- Struct literal --- *)
|
|
(* *)
|
|
(* A struct literal constructs a value of a named struct type. *)
|
|
(* IDENT "{" field: expr, ... "}" *)
|
|
(* *)
|
|
(* Field order need not match the struct definition order. *)
|
|
(* No trailing comma is permitted (consistent with struct_def). *)
|
|
(* *)
|
|
(* LL(1) notes: *)
|
|
(* struct_field_list: "}" → ε; IDENT → first field *)
|
|
(* FIRST(struct_field) = {IDENT} *)
|
|
(* FOLLOW(struct_field_list) = {"}"} *)
|
|
(* Disjoint, so no look-ahead conflict. *)
|
|
|
|
struct_lit_body = "{" , struct_field_list , "}" ;
|
|
|
|
struct_field_list = [ struct_field , { "," , struct_field } ] ;
|
|
|
|
struct_field = IDENT , ":" , expr ;
|
|
|
|
|
|
(* ================================================================ *)
|
|
(* Argument List *)
|
|
(* ================================================================ *)
|
|
|
|
arg_list = [ expr , { "," , expr } ] ;
|
|
|
|
|
|
(* ================================================================ *)
|
|
(* No-Struct Expression Hierarchy (expr_ns) *)
|
|
(* ================================================================ *)
|
|
(* *)
|
|
(* Struct literals create an LL(1) ambiguity in if/while conditions:*)
|
|
(* if Point { x: 1 } { ... } *)
|
|
(* After "Point", "{" could open a struct literal OR the body block.*)
|
|
(* *)
|
|
(* Solution: define expr_ns — identical to expr except *)
|
|
(* primary_expr_ns disallows the struct_lit_body suffix after IDENT.*)
|
|
(* Struct literals ARE still allowed when parenthesised: *)
|
|
(* if (Point { x: 1 }).flag { ... } *)
|
|
(* *)
|
|
(* if_stmt and while_stmt use expr_ns for their condition. *)
|
|
(* All other expression positions use the full expr. *)
|
|
|
|
expr_ns = assign_expr_ns ;
|
|
|
|
assign_expr_ns = or_expr_ns , [ "=" , assign_expr_ns ] ;
|
|
|
|
or_expr_ns = and_expr_ns , { "or" , and_expr_ns } ;
|
|
and_expr_ns = bitor_expr_ns , { "and" , bitor_expr_ns } ;
|
|
|
|
bitor_expr_ns = bitxor_expr_ns , { "|" , bitxor_expr_ns } ;
|
|
bitxor_expr_ns = bitand_expr_ns , { "^" , bitand_expr_ns } ;
|
|
bitand_expr_ns = additive_expr_ns , { "&" , additive_expr_ns } ;
|
|
|
|
additive_expr_ns = multiplicative_expr_ns ,
|
|
{ ( "+" | "-" ) , multiplicative_expr_ns } ;
|
|
|
|
multiplicative_expr_ns = unary_expr_ns ,
|
|
{ ( "*" | "/" | "%" ) , unary_expr_ns } ;
|
|
|
|
unary_expr_ns = "!" , unary_expr_ns
|
|
| "~" , unary_expr_ns
|
|
| "-" , unary_expr_ns
|
|
| "*" , unary_expr_ns
|
|
| "&" , unary_expr_ns
|
|
| postfix_expr_ns ;
|
|
|
|
postfix_expr_ns = primary_expr_ns , { postfix_op } ;
|
|
|
|
(* primary_expr_ns: same as primary_expr but IDENT is never *)
|
|
(* followed by struct_lit_body. Note "(" , expr , ")" uses full *)
|
|
(* expr, so struct literals are permitted inside parentheses. *)
|
|
|
|
primary_expr_ns = IDENT (* bare ident only *)
|
|
| INT_LIT
|
|
| FLOAT_LIT
|
|
| STRING_LIT
|
|
| CHAR_LIT
|
|
| "true"
|
|
| "false"
|
|
| "(" , expr , ")" ; (* struct lit OK here *)
|
|
|
|
|
|
(* ================================================================ *)
|
|
(* Types *)
|
|
(* ================================================================ *)
|
|
|
|
type = primitive_type
|
|
| named_type
|
|
| pointer_type
|
|
| array_type ;
|
|
|
|
|
|
(* --- Primitive types --- *)
|
|
(* *)
|
|
(* Unsigned integers : u8 u16 u32 u64 *)
|
|
(* Signed integers : i8 i16 i32 i64 *)
|
|
(* Floating-point : f32 f64 *)
|
|
(* Other : bool char *)
|
|
|
|
primitive_type = "u8" | "u16" | "u32" | "u64"
|
|
| "i8" | "i16" | "i32" | "i64"
|
|
| "f32" | "f64"
|
|
| "bool" | "char" ;
|
|
|
|
|
|
(* --- Named types --- *)
|
|
(* *)
|
|
(* A user-defined type referenced by its identifier (e.g. a struct *)
|
|
(* name). The lexer guarantees that all primitive-type keywords are *)
|
|
(* reserved, so IDENT never clashes with primitive_type. *)
|
|
|
|
named_type = IDENT ;
|
|
|
|
|
|
(* --- Pointer types --- *)
|
|
(* *)
|
|
(* "*" type — typed pointer; the pointee type is known. *)
|
|
(* "*opaque" — untyped/opaque pointer (no pointee type info). *)
|
|
(* *)
|
|
(* LL(1) note: after "*", "opaque" is not in FIRST(type), so the *)
|
|
(* two alternatives are always distinguishable with one token. *)
|
|
|
|
pointer_type = "*" , ( "opaque" | type ) ;
|
|
|
|
|
|
(* --- Array types --- *)
|
|
(* *)
|
|
(* "[" type ";" INT_LIT "]" *)
|
|
(* *)
|
|
(* The element type and the fixed size (a non-negative integer *)
|
|
(* literal) are separated by ";". Sizes that are constant *)
|
|
(* expressions may be introduced in a later grammar revision. *)
|
|
|
|
array_type = "[" , type , ";" , INT_LIT , "]" ;
|
|
|
|
|
|
(* ================================================================ *)
|
|
(* Statements *)
|
|
(* ================================================================ *)
|
|
|
|
stmt = let_stmt
|
|
| return_stmt
|
|
| if_stmt
|
|
| while_stmt
|
|
| loop_stmt
|
|
| break_stmt
|
|
| continue_stmt
|
|
| block_stmt
|
|
| expr_stmt ;
|
|
|
|
|
|
(* --- Return statement --- *)
|
|
(* *)
|
|
(* Exits the enclosing function, optionally yielding a value. *)
|
|
(* "return ;" is used when the function return type is (). *)
|
|
(* *)
|
|
(* LL(1): after "return", peek at next token. *)
|
|
(* ";" → no expression (unit return) *)
|
|
(* other → parse expr, then expect ";" *)
|
|
(* ";" is not in FIRST(expr), so the two cases are unambiguous. *)
|
|
|
|
return_stmt = "return" , [ expr ] , ";" ;
|
|
|
|
|
|
(* --- Expression statement --- *)
|
|
(* *)
|
|
(* Evaluates an expression for its side effects; the value is *)
|
|
(* discarded. The ";" is mandatory. *)
|
|
(* *)
|
|
(* LL(1): at stmt level: *)
|
|
(* "let" → let_stmt *)
|
|
(* "return" → return_stmt *)
|
|
(* "if" → if_stmt *)
|
|
(* "while" → while_stmt *)
|
|
(* "loop" → loop_stmt *)
|
|
(* "break" → break_stmt *)
|
|
(* "continue" → continue_stmt *)
|
|
(* "{" → block_stmt *)
|
|
(* other → expr_stmt *)
|
|
|
|
expr_stmt = expr , ";" ;
|
|
|
|
|
|
(* --- If statement --- *)
|
|
(* *)
|
|
(* Conditionally executes a block. An optional "else" branch may *)
|
|
(* follow; it is either a plain block or another "if" statement, *)
|
|
(* enabling "else if" chains of arbitrary length. *)
|
|
(* *)
|
|
(* LL(1) notes: *)
|
|
(* condition uses expr_ns — struct literals are forbidden at the *)
|
|
(* outermost level to avoid ambiguity with the body block's "{". *)
|
|
(* [ "else" ... ] — consume "else" iff next token is "else" *)
|
|
(* else_branch: "if" → if_stmt (else-if); "{" → block_stmt *)
|
|
(* The two else_branch alternatives start with distinct tokens, *)
|
|
(* so no look-ahead conflict arises (no dangling-else ambiguity). *)
|
|
|
|
if_stmt = "if" , expr_ns , block_stmt , [ "else" , else_branch ] ;
|
|
|
|
else_branch = if_stmt (* else if *)
|
|
| block_stmt ; (* plain else *)
|
|
|
|
|
|
(* --- While loop --- *)
|
|
(* *)
|
|
(* Repeatedly executes the body as long as the condition is true. *)
|
|
(* The condition is re-evaluated before every iteration. *)
|
|
(* If the condition is false on the first check, the body never *)
|
|
(* executes. *)
|
|
(* *)
|
|
(* Like if_stmt, the condition uses expr_ns to prevent struct *)
|
|
(* literal ambiguity with the body block's opening "{". *)
|
|
|
|
while_stmt = "while" , expr_ns , block_stmt ;
|
|
|
|
|
|
(* --- Infinite loop --- *)
|
|
(* *)
|
|
(* Executes the body unconditionally and indefinitely. The only *)
|
|
(* ways to exit are "break" or "return" inside the body. *)
|
|
|
|
loop_stmt = "loop" , block_stmt ;
|
|
|
|
|
|
(* --- Break and continue --- *)
|
|
(* *)
|
|
(* "break" exits the immediately enclosing "while" or "loop". *)
|
|
(* "continue" skips the rest of the current iteration and jumps to *)
|
|
(* the next condition check (while) or iteration (loop). *)
|
|
(* Both are only valid inside a loop body; the compiler enforces *)
|
|
(* this as a semantic rule. *)
|
|
|
|
break_stmt = "break" , ";" ;
|
|
continue_stmt = "continue" , ";" ;
|
|
|
|
|
|
(* --- Block statement --- *)
|
|
(* *)
|
|
(* A block groups zero or more statements into a single statement *)
|
|
(* and introduces a new lexical scope. It does not produce a value. *)
|
|
(* *)
|
|
(* LL(1): at stmt level, "{" unambiguously selects block since no *)
|
|
(* other stmt alternative starts with "{". *)
|
|
|
|
block_stmt = "{" , { stmt } , "}" ;
|
|
|
|
|
|
(* --- Let statement --- *)
|
|
(* *)
|
|
(* Introduces a named binding in the current scope. *)
|
|
(* Bindings are immutable by default; "mut" opts into mutability. *)
|
|
(* *)
|
|
(* The type annotation and the initialiser are both optional, but *)
|
|
(* at least one must be present for the binding to be usable; *)
|
|
(* the compiler enforces this as a semantic (not syntactic) rule. *)
|
|
(* *)
|
|
(* LL(1) notes: *)
|
|
(* [ "mut" ] — consume "mut" iff the next token is "mut" *)
|
|
(* [ ":" ... ] — consume iff next token is ":" *)
|
|
(* [ "=" ... ] — consume iff next token is "=" *)
|
|
(* All decision tokens are distinct, so no look-ahead conflict. *)
|
|
|
|
let_stmt = "let" , [ "mut" ] , IDENT ,
|
|
[ ":" , type ] ,
|
|
[ "=" , expr ] ,
|
|
";" ;
|
|
|
|
|
|
(* ================================================================ *)
|
|
(* Top-Level Definitions *)
|
|
(* ================================================================ *)
|
|
|
|
(* --- Function definition --- *)
|
|
(* *)
|
|
(* Defines a named function with a typed parameter list and an *)
|
|
(* optional return type. Omitting "->" implies a return type of (). *)
|
|
(* *)
|
|
(* LL(1) notes: *)
|
|
(* param_list: ")" → ε (empty list); else parse first param *)
|
|
(* param: "mut" → consume; IDENT → skip (mut absent) *)
|
|
(* [ "->" ... ]: consume iff next token is "->" *)
|
|
(* "->" is a two-character token; distinct from all stmt-starting *)
|
|
(* tokens, so no look-ahead conflict with block_stmt that follows *)
|
|
|
|
func_def = "fn" , IDENT , "(" , param_list , ")" ,
|
|
[ "->" , type ] ,
|
|
block_stmt ;
|
|
|
|
param_list = [ param , { "," , param } ] ;
|
|
|
|
(* Each parameter is an optionally-mutable name with a required *)
|
|
(* type annotation. Mutability applies within the function body. *)
|
|
|
|
param = [ "mut" ] , IDENT , ":" , type ;
|
|
|
|
|
|
(* --- Struct definition --- *)
|
|
(* *)
|
|
(* Defines a named product type with zero or more typed fields. *)
|
|
(* Fields are separated by commas; no trailing comma is permitted. *)
|
|
(* *)
|
|
(* LL(1) notes: *)
|
|
(* field_list: "}" → ε (empty struct); else parse first field *)
|
|
(* FIRST(field) = {IDENT}, FOLLOW(field_list) = {"}"} *)
|
|
(* Disjoint, so no look-ahead conflict. *)
|
|
(* top_level_def: "fn" → func_def; "struct" → struct_def *)
|
|
|
|
struct_def = "struct" , IDENT , "{" , field_list , "}" ;
|
|
|
|
field_list = [ field , { "," , field } ] ;
|
|
|
|
field = IDENT , ":" , type ;
|