(* Flux Language Grammar — Context-Free LL(1) Grammar *) (* ================================================================ *) (* *) (* Notation (ISO/IEC 14977 EBNF): *) (* rule = definition ; defines a rule (terminated by ;) *) (* a , b concatenation *) (* a | b alternation *) (* { a } zero or more repetitions of a *) (* [ a ] optional a (zero or one) *) (* ( a | b ) grouping *) (* "literal" terminal string *) (* *) (* UPPERCASE identifiers are lexical token classes whose value *) (* cannot be expressed as a single literal (e.g. IDENT, INT_LIT). *) (* They are NOT defined here — see SYNTAX.md. *) (* *) (* Unique/fixed tokens are written as quoted literals directly. *) (* *) (* Lowercase identifiers are non-terminals (grammar productions). *) (* ================================================================ *) (* Program (start symbol) *) (* ================================================================ *) program = { top_level_def } ; top_level_def = func_def | struct_def ; (* ================================================================ *) (* Expressions *) (* ================================================================ *) expr = assign_expr ; (* --- Assignment (lowest-precedence binary operator) --- *) (* *) (* Uses token `=`; right-associative via recursion. *) (* The optional form encodes at-most-one assignment target: chains *) (* like `a = b = c` parse as `a = (b = c)` thanks to right *) (* recursion. *) (* *) (* LL(1): after or_expr, peek at next token. *) (* "=" → consume and recurse into assign_expr *) (* other → return the or_expr as-is *) (* "=" is not in FIRST(stmt), so expr_stmt can still be *) (* distinguished from other statement kinds. *) assign_expr = or_expr , [ "=" , assign_expr ] ; (* --- Logical OR (lowest-precedence binary operator) --- *) (* *) (* Uses keyword `or`; left-associative via iteration. *) or_expr = and_expr , { "or" , and_expr } ; (* --- Logical AND --- *) (* *) (* Uses keyword `and`; left-associative via iteration. *) and_expr = bitor_expr , { "and" , bitor_expr } ; (* --- Bitwise OR --- *) bitor_expr = bitxor_expr , { "|" , bitxor_expr } ; (* --- Bitwise XOR --- *) bitxor_expr = bitand_expr , { "^" , bitand_expr } ; (* --- Bitwise AND --- *) bitand_expr = additive_expr , { "&" , additive_expr } ; (* --- Additive: addition and subtraction --- *) additive_expr = multiplicative_expr , { ( "+" | "-" ) , multiplicative_expr } ; (* --- Multiplicative: multiplication, division, modulo --- *) multiplicative_expr = unary_expr , { ( "*" | "/" | "%" ) , unary_expr } ; (* --- Unary operators (prefix, right-associative by recursion) --- *) (* *) (* "!" logical not *) (* "~" bitwise not *) (* "-" arithmetic negation *) (* "*" dereference (pointer indirection) *) (* "&" address-of *) unary_expr = "!" , unary_expr | "~" , unary_expr | "-" , unary_expr | "*" , unary_expr | "&" , unary_expr | postfix_expr ; (* --- Postfix operators (left-associative via iteration) --- *) (* *) (* Postfix operators bind tighter than any prefix or binary form. *) (* Multiple postfix operations chain left-to-right. *) postfix_expr = primary_expr , { postfix_op } ; postfix_op = "." , IDENT (* member access *) | "[" , expr , "]" (* subscript/index *) | "(" , arg_list , ")" ; (* function call *) (* --- Primary expressions (highest precedence) --- *) (* *) (* LL(1) note: after IDENT, peek at the next token. *) (* "{" → parse struct_lit_body (struct literal) *) (* other → bare identifier reference *) primary_expr = IDENT , [ struct_lit_body ] (* ident or struct lit *) | INT_LIT | FLOAT_LIT | STRING_LIT | CHAR_LIT | "true" | "false" | "(" , expr , ")" ; (* parenthesised *) (* --- Struct literal --- *) (* *) (* A struct literal constructs a value of a named struct type. *) (* IDENT "{" field: expr, ... "}" *) (* *) (* Field order need not match the struct definition order. *) (* No trailing comma is permitted (consistent with struct_def). *) (* *) (* LL(1) notes: *) (* struct_field_list: "}" → ε; IDENT → first field *) (* FIRST(struct_field) = {IDENT} *) (* FOLLOW(struct_field_list) = {"}"} *) (* Disjoint, so no look-ahead conflict. *) struct_lit_body = "{" , struct_field_list , "}" ; struct_field_list = [ struct_field , { "," , struct_field } ] ; struct_field = IDENT , ":" , expr ; (* ================================================================ *) (* Argument List *) (* ================================================================ *) arg_list = [ expr , { "," , expr } ] ; (* ================================================================ *) (* No-Struct Expression Hierarchy (expr_ns) *) (* ================================================================ *) (* *) (* Struct literals create an LL(1) ambiguity in if/while conditions:*) (* if Point { x: 1 } { ... } *) (* After "Point", "{" could open a struct literal OR the body block.*) (* *) (* Solution: define expr_ns — identical to expr except *) (* primary_expr_ns disallows the struct_lit_body suffix after IDENT.*) (* Struct literals ARE still allowed when parenthesised: *) (* if (Point { x: 1 }).flag { ... } *) (* *) (* if_stmt and while_stmt use expr_ns for their condition. *) (* All other expression positions use the full expr. *) expr_ns = assign_expr_ns ; assign_expr_ns = or_expr_ns , [ "=" , assign_expr_ns ] ; or_expr_ns = and_expr_ns , { "or" , and_expr_ns } ; and_expr_ns = bitor_expr_ns , { "and" , bitor_expr_ns } ; bitor_expr_ns = bitxor_expr_ns , { "|" , bitxor_expr_ns } ; bitxor_expr_ns = bitand_expr_ns , { "^" , bitand_expr_ns } ; bitand_expr_ns = additive_expr_ns , { "&" , additive_expr_ns } ; additive_expr_ns = multiplicative_expr_ns , { ( "+" | "-" ) , multiplicative_expr_ns } ; multiplicative_expr_ns = unary_expr_ns , { ( "*" | "/" | "%" ) , unary_expr_ns } ; unary_expr_ns = "!" , unary_expr_ns | "~" , unary_expr_ns | "-" , unary_expr_ns | "*" , unary_expr_ns | "&" , unary_expr_ns | postfix_expr_ns ; postfix_expr_ns = primary_expr_ns , { postfix_op } ; (* primary_expr_ns: same as primary_expr but IDENT is never *) (* followed by struct_lit_body. Note "(" , expr , ")" uses full *) (* expr, so struct literals are permitted inside parentheses. *) primary_expr_ns = IDENT (* bare ident only *) | INT_LIT | FLOAT_LIT | STRING_LIT | CHAR_LIT | "true" | "false" | "(" , expr , ")" ; (* struct lit OK here *) (* ================================================================ *) (* Types *) (* ================================================================ *) type = primitive_type | named_type | pointer_type | array_type ; (* --- Primitive types --- *) (* *) (* Unsigned integers : u8 u16 u32 u64 *) (* Signed integers : i8 i16 i32 i64 *) (* Floating-point : f32 f64 *) (* Other : bool char *) primitive_type = "u8" | "u16" | "u32" | "u64" | "i8" | "i16" | "i32" | "i64" | "f32" | "f64" | "bool" | "char" ; (* --- Named types --- *) (* *) (* A user-defined type referenced by its identifier (e.g. a struct *) (* name). The lexer guarantees that all primitive-type keywords are *) (* reserved, so IDENT never clashes with primitive_type. *) named_type = IDENT ; (* --- Pointer types --- *) (* *) (* "*" type — typed pointer; the pointee type is known. *) (* "*opaque" — untyped/opaque pointer (no pointee type info). *) (* *) (* LL(1) note: after "*", "opaque" is not in FIRST(type), so the *) (* two alternatives are always distinguishable with one token. *) pointer_type = "*" , ( "opaque" | type ) ; (* --- Array types --- *) (* *) (* "[" type ";" INT_LIT "]" *) (* *) (* The element type and the fixed size (a non-negative integer *) (* literal) are separated by ";". Sizes that are constant *) (* expressions may be introduced in a later grammar revision. *) array_type = "[" , type , ";" , INT_LIT , "]" ; (* ================================================================ *) (* Statements *) (* ================================================================ *) stmt = let_stmt | return_stmt | if_stmt | while_stmt | loop_stmt | break_stmt | continue_stmt | block_stmt | expr_stmt ; (* --- Return statement --- *) (* *) (* Exits the enclosing function, optionally yielding a value. *) (* "return ;" is used when the function return type is (). *) (* *) (* LL(1): after "return", peek at next token. *) (* ";" → no expression (unit return) *) (* other → parse expr, then expect ";" *) (* ";" is not in FIRST(expr), so the two cases are unambiguous. *) return_stmt = "return" , [ expr ] , ";" ; (* --- Expression statement --- *) (* *) (* Evaluates an expression for its side effects; the value is *) (* discarded. The ";" is mandatory. *) (* *) (* LL(1): at stmt level: *) (* "let" → let_stmt *) (* "return" → return_stmt *) (* "if" → if_stmt *) (* "while" → while_stmt *) (* "loop" → loop_stmt *) (* "break" → break_stmt *) (* "continue" → continue_stmt *) (* "{" → block_stmt *) (* other → expr_stmt *) expr_stmt = expr , ";" ; (* --- If statement --- *) (* *) (* Conditionally executes a block. An optional "else" branch may *) (* follow; it is either a plain block or another "if" statement, *) (* enabling "else if" chains of arbitrary length. *) (* *) (* LL(1) notes: *) (* condition uses expr_ns — struct literals are forbidden at the *) (* outermost level to avoid ambiguity with the body block's "{". *) (* [ "else" ... ] — consume "else" iff next token is "else" *) (* else_branch: "if" → if_stmt (else-if); "{" → block_stmt *) (* The two else_branch alternatives start with distinct tokens, *) (* so no look-ahead conflict arises (no dangling-else ambiguity). *) if_stmt = "if" , expr_ns , block_stmt , [ "else" , else_branch ] ; else_branch = if_stmt (* else if *) | block_stmt ; (* plain else *) (* --- While loop --- *) (* *) (* Repeatedly executes the body as long as the condition is true. *) (* The condition is re-evaluated before every iteration. *) (* If the condition is false on the first check, the body never *) (* executes. *) (* *) (* Like if_stmt, the condition uses expr_ns to prevent struct *) (* literal ambiguity with the body block's opening "{". *) while_stmt = "while" , expr_ns , block_stmt ; (* --- Infinite loop --- *) (* *) (* Executes the body unconditionally and indefinitely. The only *) (* ways to exit are "break" or "return" inside the body. *) loop_stmt = "loop" , block_stmt ; (* --- Break and continue --- *) (* *) (* "break" exits the immediately enclosing "while" or "loop". *) (* "continue" skips the rest of the current iteration and jumps to *) (* the next condition check (while) or iteration (loop). *) (* Both are only valid inside a loop body; the compiler enforces *) (* this as a semantic rule. *) break_stmt = "break" , ";" ; continue_stmt = "continue" , ";" ; (* --- Block statement --- *) (* *) (* A block groups zero or more statements into a single statement *) (* and introduces a new lexical scope. It does not produce a value. *) (* *) (* LL(1): at stmt level, "{" unambiguously selects block since no *) (* other stmt alternative starts with "{". *) block_stmt = "{" , { stmt } , "}" ; (* --- Let statement --- *) (* *) (* Introduces a named binding in the current scope. *) (* Bindings are immutable by default; "mut" opts into mutability. *) (* *) (* The type annotation and the initialiser are both optional, but *) (* at least one must be present for the binding to be usable; *) (* the compiler enforces this as a semantic (not syntactic) rule. *) (* *) (* LL(1) notes: *) (* [ "mut" ] — consume "mut" iff the next token is "mut" *) (* [ ":" ... ] — consume iff next token is ":" *) (* [ "=" ... ] — consume iff next token is "=" *) (* All decision tokens are distinct, so no look-ahead conflict. *) let_stmt = "let" , [ "mut" ] , IDENT , [ ":" , type ] , [ "=" , expr ] , ";" ; (* ================================================================ *) (* Top-Level Definitions *) (* ================================================================ *) (* --- Function definition --- *) (* *) (* Defines a named function with a typed parameter list and an *) (* optional return type. Omitting "->" implies a return type of (). *) (* *) (* LL(1) notes: *) (* param_list: ")" → ε (empty list); else parse first param *) (* param: "mut" → consume; IDENT → skip (mut absent) *) (* [ "->" ... ]: consume iff next token is "->" *) (* "->" is a two-character token; distinct from all stmt-starting *) (* tokens, so no look-ahead conflict with block_stmt that follows *) func_def = "fn" , IDENT , "(" , param_list , ")" , [ "->" , type ] , block_stmt ; param_list = [ param , { "," , param } ] ; (* Each parameter is an optionally-mutable name with a required *) (* type annotation. Mutability applies within the function body. *) param = [ "mut" ] , IDENT , ":" , type ; (* --- Struct definition --- *) (* *) (* Defines a named product type with zero or more typed fields. *) (* Fields are separated by commas; no trailing comma is permitted. *) (* *) (* LL(1) notes: *) (* field_list: "}" → ε (empty struct); else parse first field *) (* FIRST(field) = {IDENT}, FOLLOW(field_list) = {"}"} *) (* Disjoint, so no look-ahead conflict. *) (* top_level_def: "fn" → func_def; "struct" → struct_def *) struct_def = "struct" , IDENT , "{" , field_list , "}" ; field_list = [ field , { "," , field } ] ; field = IDENT , ":" , type ;