Initial Flux language specification

Add the LL(1) context-free grammar (GRAMMAR.ebnf), token and syntax reference (SYNTAX.md), LL(1) verification tool (ll1_check.py), and a fibonacci example demonstrating the language.
2026-03-10 14:41:54 +01:00
commit 73e36fac71
4 changed files with 1607 additions and 0 deletions
--- a/GRAMMAR.ebnf
+++ b/GRAMMAR.ebnf
@@ -0,0 +1,435 @@
 (* Flux Language Grammar — Context-Free LL(1) Grammar               *)
 (* ================================================================ *)
 (*                                                                  *)
 (* Notation (ISO/IEC 14977 EBNF):                                   *)
 (*   rule = definition ;        defines a rule (terminated by ;)    *)
 (*   a , b                      concatenation                       *)
 (*   a | b                      alternation                         *)
 (*   { a }                      zero or more repetitions of a       *)
 (*   [ a ]                      optional a  (zero or one)           *)
 (*   ( a | b )                  grouping                            *)
 (*   "literal"                  terminal string                     *)
 (*                                                                  *)
 (* UPPERCASE identifiers are lexical token classes whose value      *)
 (* cannot be expressed as a single literal (e.g. IDENT, INT_LIT).   *)
 (* They are NOT defined here — see SYNTAX.md.                       *)
 (*                                                                  *)
 (* Unique/fixed tokens are written as quoted literals directly.     *)
 (*                                                                  *)
 (* Lowercase identifiers are non-terminals (grammar productions).   *)
 (* ================================================================ *)
 (* Program (start symbol)                                           *)
 (* ================================================================ *)
 program = { top_level_def } ;
 top_level_def = func_def
              | struct_def ;
 (* ================================================================ *)
 (* Expressions                                                      *)
 (* ================================================================ *)
 expr = or_expr ;
 (* --- Logical OR (lowest-precedence binary operator) ---           *)
 (*                                                                  *)
 (* Uses keyword `or`; left-associative via iteration.               *)
 or_expr = and_expr , { "or" , and_expr } ;
 (* --- Logical AND ---                                              *)
 (*                                                                  *)
 (* Uses keyword `and`; left-associative via iteration.              *)
 and_expr = bitor_expr , { "and" , bitor_expr } ;
 (* --- Bitwise OR ---                                               *)
 bitor_expr = bitxor_expr , { "|" , bitxor_expr } ;
 (* --- Bitwise XOR ---                                              *)
 bitxor_expr = bitand_expr , { "^" , bitand_expr } ;
 (* --- Bitwise AND ---                                              *)
 bitand_expr = additive_expr , { "&" , additive_expr } ;
 (* --- Additive: addition and subtraction ---                       *)
 additive_expr = multiplicative_expr ,
                { ( "+" | "-" ) , multiplicative_expr } ;
 (* --- Multiplicative: multiplication, division, modulo ---         *)
 multiplicative_expr = unary_expr ,
                      { ( "*" | "/" | "%" ) , unary_expr } ;
 (* --- Unary operators (prefix, right-associative by recursion) --- *)
 (*                                                                  *)
 (* "!"  logical not                                                 *)
 (* "~"  bitwise not                                                 *)
 (* "-"  arithmetic negation                                         *)
 (* "*"  dereference (pointer indirection)                           *)
 (* "&"  address-of                                                  *)
 unary_expr = "!" , unary_expr
           | "~" , unary_expr
           | "-" , unary_expr
           | "*" , unary_expr
           | "&" , unary_expr
           | postfix_expr ;
 (* --- Postfix operators (left-associative via iteration) ---       *)
 (*                                                                  *)
 (* Postfix operators bind tighter than any prefix or binary form.   *)
 (* Multiple postfix operations chain left-to-right.                 *)
 postfix_expr = primary_expr , { postfix_op } ;
 postfix_op = "." , IDENT                         (* member access   *)
           | "[" , expr , "]"                    (* subscript/index *)
           | "(" , arg_list , ")" ;              (* function call   *)
 (* --- Primary expressions (highest precedence) ---                 *)
 (*                                                                  *)
 (* LL(1) note: after IDENT, peek at the next token.                 *)
 (*   "{" → parse struct_lit_body (struct literal)                   *)
 (*   other → bare identifier reference                              *)
 primary_expr = IDENT , [ struct_lit_body ]   (* ident or struct lit *)
             | INT_LIT
             | FLOAT_LIT
             | STRING_LIT
             | CHAR_LIT
             | "true"
             | "false"
             | "(" , expr , ")" ;             (* parenthesised      *)
 (* --- Struct literal ---                                           *)
 (*                                                                  *)
 (* A struct literal constructs a value of a named struct type.      *)
 (*   IDENT "{" field: expr, ... "}"                                 *)
 (*                                                                  *)
 (* Field order need not match the struct definition order.          *)
 (* No trailing comma is permitted (consistent with struct_def).     *)
 (*                                                                  *)
 (* LL(1) notes:                                                     *)
 (*   struct_field_list: "}" → ε; IDENT → first field                *)
 (*   FIRST(struct_field) = {IDENT}                                  *) 
 (*   FOLLOW(struct_field_list) = {"}"}                              *)
 (*   Disjoint, so no look-ahead conflict.                           *)
 struct_lit_body   = "{" , struct_field_list , "}" ;
 struct_field_list = [ struct_field , { "," , struct_field } ] ;
 struct_field = IDENT , ":" , expr ;
 (* ================================================================ *)
 (* Argument List                                                    *)
 (* ================================================================ *)
 arg_list = [ expr , { "," , expr } ] ;
 (* ================================================================ *)
 (* No-Struct Expression Hierarchy (expr_ns)                         *)
 (* ================================================================ *)
 (*                                                                  *)
 (* Struct literals create an LL(1) ambiguity in if/while conditions:*)
 (*   if Point { x: 1 } { ... }                                      *)
 (* After "Point", "{" could open a struct literal OR the body block.*)
 (*                                                                  *)
 (* Solution: define expr_ns — identical to expr except              *)
 (* primary_expr_ns disallows the struct_lit_body suffix after IDENT.*)
 (* Struct literals ARE still allowed when parenthesised:            *)
 (*   if (Point { x: 1 }).flag { ... }                               *)
 (*                                                                  *)
 (* if_stmt and while_stmt use expr_ns for their condition.          *)
 (* All other expression positions use the full expr.                *)
 expr_ns = or_expr_ns ;
 or_expr_ns  = and_expr_ns , { "or"  , and_expr_ns } ;
 and_expr_ns = bitor_expr_ns , { "and" , bitor_expr_ns } ;
 bitor_expr_ns  = bitxor_expr_ns , { "|" , bitxor_expr_ns } ;
 bitxor_expr_ns = bitand_expr_ns , { "^" , bitand_expr_ns } ;
 bitand_expr_ns = additive_expr_ns , { "&" , additive_expr_ns } ;
 additive_expr_ns = multiplicative_expr_ns ,
                   { ( "+" | "-" ) , multiplicative_expr_ns } ;
 multiplicative_expr_ns = unary_expr_ns ,
                         { ( "*" | "/" | "%" ) , unary_expr_ns } ;
 unary_expr_ns = "!" , unary_expr_ns
              | "~" , unary_expr_ns
              | "-" , unary_expr_ns
              | "*" , unary_expr_ns
              | "&" , unary_expr_ns
              | postfix_expr_ns ;
 postfix_expr_ns = primary_expr_ns , { postfix_op } ;
 (* primary_expr_ns: same as primary_expr but IDENT is never         *)
 (* followed by struct_lit_body. Note "(" , expr , ")" uses full     *)
 (* expr, so struct literals are permitted inside parentheses.       *)
 primary_expr_ns = IDENT                          (* bare ident only *)
                | INT_LIT
                | FLOAT_LIT
                | STRING_LIT
                | CHAR_LIT
                | "true"
                | "false"
                | "(" , expr , ")" ;          (* struct lit OK here *)
 (* ================================================================ *)
 (* Types                                                            *)
 (* ================================================================ *)
 type = primitive_type
     | named_type
     | pointer_type
     | array_type ;
 (* --- Primitive types ---                                          *)
 (*                                                                  *)
 (* Unsigned integers : u8  u16  u32  u64                            *)
 (* Signed integers   : i8  i16  i32  i64                            *)
 (* Floating-point    : f32  f64                                     *)
 (* Other             : bool  char                                   *)
 primitive_type = "u8"  | "u16" | "u32" | "u64"
               | "i8"  | "i16" | "i32" | "i64"
               | "f32" | "f64"
               | "bool" | "char" ;
 (* --- Named types ---                                              *)
 (*                                                                  *)
 (* A user-defined type referenced by its identifier (e.g. a struct  *)
 (* name). The lexer guarantees that all primitive-type keywords are *)
 (* reserved, so IDENT never clashes with primitive_type.            *)
 named_type = IDENT ;
 (* --- Pointer types ---                                            *)
 (*                                                                  *)
 (* "*" type    — typed pointer; the pointee type is known.          *)
 (* "*opaque"   — untyped/opaque pointer (no pointee type info).     *)
 (*                                                                  *)
 (* LL(1) note: after "*", "opaque" is not in FIRST(type), so the    *)
 (* two alternatives are always distinguishable with one token.      *)
 pointer_type = "*" , ( "opaque" | type ) ;
 (* --- Array types ---                                              *)
 (*                                                                  *)
 (* "[" type ";" INT_LIT "]"                                         *)
 (*                                                                  *)
 (* The element type and the fixed size (a non-negative integer      *)
 (* literal) are separated by ";". Sizes that are constant           *)
 (* expressions may be introduced in a later grammar revision.       *)
 array_type = "[" , type , ";" , INT_LIT , "]" ;
 (* ================================================================ *)
 (* Statements                                                       *)
 (* ================================================================ *)
 stmt = let_stmt
     | return_stmt
     | if_stmt
     | while_stmt
     | loop_stmt
     | break_stmt
     | continue_stmt
     | block_stmt
     | expr_stmt ;
 (* --- Return statement ---                                         *)
 (*                                                                  *)
 (* Exits the enclosing function, optionally yielding a value.       *)
 (* "return ;" is used when the function return type is ().          *)
 (*                                                                  *)
 (* LL(1): after "return", peek at next token.                       *)
 (*   ";" → no expression (unit return)                              *)
 (*   other → parse expr, then expect ";"                            *)
 (* ";" is not in FIRST(expr), so the two cases are unambiguous.     *)
 return_stmt = "return" , [ expr ] , ";" ;
 (* --- Expression statement ---                                     *)
 (*                                                                  *)
 (* Evaluates an expression for its side effects; the value is       *)
 (* discarded. The ";" is mandatory.                                 *)
 (*                                                                  *)
 (* LL(1): at stmt level:                                            *)
 (*   "let"      → let_stmt                                          *)
 (*   "return"   → return_stmt                                       *)
 (*   "if"       → if_stmt                                           *)
 (*   "while"    → while_stmt                                        *)
 (*   "loop"     → loop_stmt                                         *)
 (*   "break"    → break_stmt                                        *)
 (*   "continue" → continue_stmt                                     *)
 (*   "{"        → block_stmt                                        *)
 (*   other      → expr_stmt                                         *)
 expr_stmt = expr , ";" ;
 (* --- If statement ---                                             *)
 (*                                                                  *)
 (* Conditionally executes a block. An optional "else" branch may    *)
 (* follow; it is either a plain block or another "if" statement,    *)
 (* enabling "else if" chains of arbitrary length.                   *)
 (*                                                                  *)
 (* LL(1) notes:                                                     *)
 (*   condition uses expr_ns — struct literals are forbidden at the  *)
 (*   outermost level to avoid ambiguity with the body block's "{".  *)
 (*   [ "else" ... ] — consume "else" iff next token is "else"       *)
 (*   else_branch: "if" → if_stmt (else-if); "{" → block_stmt        *)
 (*   The two else_branch alternatives start with distinct tokens,   *)
 (*   so no look-ahead conflict arises (no dangling-else ambiguity). *)
 if_stmt     = "if" , expr_ns , block_stmt , [ "else" , else_branch ] ;
 else_branch = if_stmt       (* else if *)
            | block_stmt ;  (* plain else *)
 (* --- While loop ---                                               *)
 (*                                                                  *)
 (* Repeatedly executes the body as long as the condition is true.   *)
 (* The condition is re-evaluated before every iteration.            *)
 (* If the condition is false on the first check, the body never     *)
 (* executes.                                                        *)
 (*                                                                  *)
 (* Like if_stmt, the condition uses expr_ns to prevent struct       *)
 (* literal ambiguity with the body block's opening "{".             *)
 while_stmt = "while" , expr_ns , block_stmt ;
 (* --- Infinite loop ---                                            *)
 (*                                                                  *)
 (* Executes the body unconditionally and indefinitely. The only     *)
 (* ways to exit are "break" or "return" inside the body.            *)
 loop_stmt = "loop" , block_stmt ;
 (* --- Break and continue ---                                       *)
 (*                                                                  *)
 (* "break"    exits the immediately enclosing "while" or "loop".    *)
 (* "continue" skips the rest of the current iteration and jumps to  *)
 (*            the next condition check (while) or iteration (loop). *)
 (* Both are only valid inside a loop body; the compiler enforces    *)
 (* this as a semantic rule.                                         *)
 break_stmt    = "break" , ";" ;
 continue_stmt = "continue" , ";" ;
 (* --- Block statement ---                                          *)
 (*                                                                  *)
 (* A block groups zero or more statements into a single statement   *)
 (* and introduces a new lexical scope. It does not produce a value. *)
 (*                                                                  *)
 (* LL(1): at stmt level, "{" unambiguously selects block since no   *)
 (* other stmt alternative starts with "{".                          *)
 block_stmt = "{" , { stmt } , "}" ;
 (* --- Let statement ---                                            *)
 (*                                                                  *)
 (* Introduces a named binding in the current scope.                 *)
 (* Bindings are immutable by default; "mut" opts into mutability.   *)
 (*                                                                  *)
 (* The type annotation and the initialiser are both optional, but   *)
 (* at least one must be present for the binding to be usable;       *)
 (* the compiler enforces this as a semantic (not syntactic) rule.   *)
 (*                                                                  *)
 (* LL(1) notes:                                                     *)
 (*   [ "mut" ]   — consume "mut" iff the next token is "mut"        *)
 (*   [ ":" ... ] — consume iff next token is ":"                    *)
 (*   [ "=" ... ] — consume iff next token is "="                    *)
 (*   All decision tokens are distinct, so no look-ahead conflict.   *)
 let_stmt = "let" , [ "mut" ] , IDENT ,
           [ ":" , type ] ,
           [ "=" , expr ] ,
           ";" ;
 (* ================================================================ *)
 (* Top-Level Definitions                                            *)
 (* ================================================================ *)
 (* --- Function definition ---                                      *)
 (*                                                                  *)
 (* Defines a named function with a typed parameter list and an      *)
 (* optional return type. Omitting "->" implies a return type of (). *)
 (*                                                                  *)
 (* LL(1) notes:                                                     *)
 (*   param_list: ")" → ε (empty list); else parse first param       *)
 (*   param: "mut" → consume; IDENT → skip (mut absent)              *)
 (*   [ "->" ... ]: consume iff next token is "->"                   *)
 (*   "->" is a two-character token; distinct from all stmt-starting *)
 (*   tokens, so no look-ahead conflict with block_stmt that follows *)
 func_def = "fn" , IDENT , "(" , param_list , ")" ,
           [ "->" , type ] ,
           block_stmt ;
 param_list = [ param , { "," , param } ] ;
 (* Each parameter is an optionally-mutable name with a required     *)
 (* type annotation. Mutability applies within the function body.    *)
 param = [ "mut" ] , IDENT , ":" , type ;
 (* --- Struct definition ---                                        *)
 (*                                                                  *)
 (* Defines a named product type with zero or more typed fields.     *)
 (* Fields are separated by commas; no trailing comma is permitted.  *)
 (*                                                                  *)
 (* LL(1) notes:                                                     *)
 (*   field_list: "}" → ε (empty struct); else parse first field     *)
 (*   FIRST(field) = {IDENT}, FOLLOW(field_list) = {"}"}             *)
 (*   Disjoint, so no look-ahead conflict.                           *)
 (*   top_level_def: "fn" → func_def; "struct" → struct_def          *)
 struct_def = "struct" , IDENT , "{" , field_list , "}" ;
 field_list = [ field , { "," , field } ] ;
 field = IDENT , ":" , type ;
--- a/SYNTAX.md
+++ b/SYNTAX.md
@@ -0,0 +1,803 @@
 # Flux Language Syntax Reference
 ## Lexical Tokens
 All tokens listed here are produced by the lexer (lexical analysis phase) and
 appear as UPPERCASE terminals in `GRAMMAR.ebnf`.
 ### Literals
 | Token        | Description                                                         | Examples                       |
 | ------------ | ------------------------------------------------------------------- | ------------------------------ |
 | `INT_LIT`    | Integer literal (decimal, hex `0x`, octal `0o`, binary `0b`)        | `42`, `0xFF`, `0o77`, `0b1010` |
 | `FLOAT_LIT`  | Floating-point literal                                              | `3.14`, `1.0e-9`, `0.5`        |
 | `STRING_LIT` | Double-quoted UTF-8 string, supports `\n \t \\ \"` escape sequences | `"hello\nworld"`               |
 | `CHAR_LIT`   | Single-quoted Unicode scalar value                                  | `'a'`, `'\n'`, `'\u{1F600}'`   |
 | `TRUE`       | Boolean true literal                                                | `true`                         |
 | `FALSE`      | Boolean false literal                                               | `false`                        |
 ### Identifier
 | Token   | Description                                                                                                  |
 | ------- | ------------------------------------------------------------------------------------------------------------ |
 | `IDENT` | Identifier: starts with a letter or `_`, followed by letters, digits, or `_`. Unicode letters are permitted. |
 ### Operator Tokens
 | Token     | Lexeme | Description                            |
 | --------- | ------ | -------------------------------------- |
 | `PLUS`    | `+`    | Addition / unary plus (not in grammar) |
 | `MINUS`   | `-`    | Subtraction / unary negation           |
 | `STAR`    | `*`    | Multiplication / pointer dereference   |
 | `SLASH`   | `/`    | Division                               |
 | `PERCENT` | `%`    | Modulo (remainder)                     |
 | `AMP`     | `&`    | Bitwise AND / address-of               |
 | `PIPE`    | `\|`   | Bitwise OR                             |
 | `CARET`   | `^`    | Bitwise XOR                            |
 | `BANG`    | `!`    | Logical NOT                            |
 | `TILDE`   | `~`    | Bitwise NOT                            |
 | `DOT`     | `.`    | Member access                          |
 ### Keyword Tokens
 #### Operator Keywords
 | Lexeme | Description |
 | ------ | ----------- |
 | `and`  | Logical AND |
 | `or`   | Logical OR  |
 #### Boolean Literals
 | Lexeme  | Description         |
 | ------- | ------------------- |
 | `true`  | Boolean true value  |
 | `false` | Boolean false value |
 #### Primitive Type Keywords
 | Lexeme | Description                    |
 | ------ | ------------------------------ |
 | `u8`   | Unsigned 8-bit integer         |
 | `u16`  | Unsigned 16-bit integer        |
 | `u32`  | Unsigned 32-bit integer        |
 | `u64`  | Unsigned 64-bit integer        |
 | `i8`   | Signed 8-bit integer           |
 | `i16`  | Signed 16-bit integer          |
 | `i32`  | Signed 32-bit integer          |
 | `i64`  | Signed 64-bit integer          |
 | `f32`  | 32-bit IEEE 754 floating-point |
 | `f64`  | 64-bit IEEE 754 floating-point |
 | `bool` | Boolean (`true` or `false`)    |
 | `char` | Unicode scalar value (32-bit)  |
 #### Pointer Keyword
 | Lexeme   | Description                                             |
 | -------- | ------------------------------------------------------- |
 | `opaque` | Used in `*opaque` to denote a pointer with no type info |
 #### Statement Keywords
 | Lexeme     | Description                           |
 | ---------- | ------------------------------------- |
 | `let`      | Introduces a variable binding         |
 | `mut`      | Marks a binding or pointer as mutable |
 | `return`   | Exits the enclosing function          |
 | `if`       | Conditional statement                 |
 | `else`     | Alternative branch of an `if`         |
 | `while`    | Condition-controlled loop             |
 | `loop`     | Infinite loop                         |
 | `break`    | Exit the immediately enclosing loop   |
 | `continue` | Skip to the next iteration of a loop  |
 #### Definition Keywords
 | Lexeme   | Description                      |
 | -------- | -------------------------------- |
 | `fn`     | Introduces a function definition |
 | `struct` | Introduces a struct definition   |
 > **Lexer note:** All keywords above are reserved and must be recognised before
 > the general `IDENT` rule. An identifier may not shadow any keyword.
 ### Delimiter / Punctuation Tokens
 | Token       | Lexeme | Description                                            |
 | ----------- | ------ | ------------------------------------------------------ |
 | `LPAREN`    | `(`    | Left parenthesis                                       |
 | `RPAREN`    | `)`    | Right parenthesis                                      |
 | `LBRACKET`  | `[`    | Left square bracket                                    |
 | `RBRACKET`  | `]`    | Right square bracket                                   |
 | `COMMA`     | `,`    | Argument / element separator                           |
 | `SEMICOLON` | `;`    | Statement terminator / array size separator (`[T; N]`) |
 | `LCURLY`    | `{`    | Block / compound expression open                       |
 | `RCURLY`    | `}`    | Block / compound expression close                      |
 | `ARROW`     | `->`   | Function return type separator                         |
 | `COLON`     | `:`    | Type annotation separator                              |
 ---
 ## Expressions
 Expressions produce a value. The grammar defines them through a hierarchy of
 precedence levels — lower in the list means lower precedence (binds less
 tightly).
 ### Operator Precedence Table
 | Level | Operators                   | Associativity  | Description                      |
 | ----- | --------------------------- | -------------- | -------------------------------- |
 | 1     | `or`                        | left           | Logical OR (lowest)              |
 | 2     | `and`                       | left           | Logical AND                      |
 | 3     | `\|`                        | left           | Bitwise OR                       |
 | 4     | `^`                         | left           | Bitwise XOR                      |
 | 5     | `&`                         | left           | Bitwise AND                      |
 | 6     | `+`  `-`                    | left           | Addition, subtraction            |
 | 7     | `*`  `/`  `%`               | left           | Multiplication, division, modulo |
 | 8     | `!`  `~`  `-`  `*`  `&`     | right (unary)  | Prefix unary operators           |
 | 9     | `.`  `[…]`  `(…)`           | left (postfix) | Member access, index, call       |
 | 10    | literals, identifiers, `()` | —              | Primary expressions (highest)    |
 ### Operator Descriptions
 #### Binary Operators
 | Operator | Name           | Example   | Notes                                        |
 | -------- | -------------- | --------- | -------------------------------------------- |
 | `or`     | Logical OR     | `a or b`  | Short-circuits; both operands must be `bool` |
 | `and`    | Logical AND    | `a and b` | Short-circuits; both operands must be `bool` |
 | `\|`     | Bitwise OR     | `a \| b`  | Integer types                                |
 | `^`      | Bitwise XOR    | `a ^ b`   | Integer types                                |
 | `&`      | Bitwise AND    | `a & b`   | Integer types (binary context)               |
 | `+`      | Addition       | `a + b`   |                                              |
 | `-`      | Subtraction    | `a - b`   |                                              |
 | `*`      | Multiplication | `a * b`   | Binary context (both operands are values)    |
 | `/`      | Division       | `a / b`   | Integer division truncates toward zero       |
 | `%`      | Modulo         | `a % b`   | Sign follows the dividend                    |
 #### Unary Prefix Operators
 | Operator | Name        | Example | Notes                                            |
 | -------- | ----------- | ------- | ------------------------------------------------ |
 | `!`      | Logical NOT | `!cond` | Operand must be `bool`                           |
 | `~`      | Bitwise NOT | `~mask` | Bitwise complement; integer types                |
 | `-`      | Negation    | `-x`    | Arithmetic negation                              |
 | `*`      | Dereference | `*ptr`  | Unary context; operand must be a pointer type    |
 | `&`      | Address-of  | `&x`    | Unary context; produces a pointer to the operand |
 #### Postfix Operators
 | Operator | Name          | Example     | Notes                                             |
 | -------- | ------------- | ----------- | ------------------------------------------------- |
 | `.`      | Member access | `obj.field` | Accesses a named field or method of a struct/type |
 | `[…]`    | Subscript     | `arr[i]`    | Indexes into an array, slice, or map              |
 | `(…)`    | Call          | `f(a, b)`   | Invokes a function or closure                     |
 > **Disambiguation:** `*` and `&` are context-sensitive.
 > When appearing as the first token of a `unary_expr` they are **unary**
 > (dereference / address-of). When appearing between two `unary_expr`
 > sub-trees inside `multiplicative_expr` or `bitand_expr` they are **binary**
 > (multiplication / bitwise AND). The parser resolves this purely from
 > grammatical position — no look-ahead beyond 1 token is required.
 ### Parenthesised Expressions
 Any expression may be wrapped in parentheses to override default precedence:
 ```
 (a + b) * c
 ```
 ### Function Call Argument List
 Arguments are comma-separated expressions. A trailing comma is **not**
 permitted at this grammar level.
 ```
 f()
 f(x)
 f(x, y, z)
 ```
 ### Examples
 ```flux
 // Arithmetic
 a + b * c - d % 2
 // Bitwise
 flags & MASK | extra ^ toggle
 // Logical
 ready and not_done or fallback
 // Mixed unary / postfix
 *ptr.field
 &arr[i]
 !cond
 // Chained postfix
 obj.method(arg1, arg2)[0].name
 // Explicit precedence override
 (a or b) and c
 ```
 ---
 ## Types
 Types describe the shape and interpretation of values. All type positions in
 the grammar reference the `type` non-terminal.
 ### Primitive Types
 Primitive types are single-keyword types built into the language.
 | Type   | Kind             | Width  | Range / Notes                              |
 | ------ | ---------------- | ------ | ------------------------------------------ |
 | `u8`   | Unsigned integer | 8-bit  | 0 … 255                                    |
 | `u16`  | Unsigned integer | 16-bit | 0 … 65 535                                 |
 | `u32`  | Unsigned integer | 32-bit | 0 … 4 294 967 295                          |
 | `u64`  | Unsigned integer | 64-bit | 0 … 2⁶⁴ − 1                                |
 | `i8`   | Signed integer   | 8-bit  | −128 … 127                                 |
 | `i16`  | Signed integer   | 16-bit | −32 768 … 32 767                           |
 | `i32`  | Signed integer   | 32-bit | −2 147 483 648 … 2 147 483 647             |
 | `i64`  | Signed integer   | 64-bit | −2⁶³ … 2⁶³ − 1                             |
 | `f32`  | Floating-point   | 32-bit | IEEE 754 single precision                  |
 | `f64`  | Floating-point   | 64-bit | IEEE 754 double precision                  |
 | `bool` | Boolean          | 1 byte | `true` or `false`                          |
 | `char` | Unicode scalar   | 32-bit | Any Unicode scalar value (not a surrogate) |
 ### Named Types
 A named type is any user-defined type referenced by its identifier — typically a struct name. Because all primitive-type keywords (`u8`, `bool`, etc.) are reserved, an `IDENT` in type position is always a named type, never a primitive.
 ```flux
 Point        // struct Point { x: f32, y: f32 }
 Node         // struct Node { value: i64, next: *Node }
 *Point       // pointer to a named type
 [Node; 8]    // array of a named type
 ```
 ### Pointer Types
 A pointer type is written with a leading `*`.
 | Syntax    | Description                                                                           |
 | --------- | ------------------------------------------------------------------------------------- |
 | `*T`      | Typed pointer — points to a value of type `T`                                         |
 | `*opaque` | Opaque pointer — no compile-time pointee type information; equivalent to C's `void *` |
 Pointer types may be nested: `**u8` is a pointer to a pointer to `u8`.
 ```flux
 *u8          // pointer to u8
 **i32        // pointer to pointer to i32
 *opaque      // untyped pointer
 **opaque     // pointer to untyped pointer
 ```
 ### Array Types
 Arrays have a fixed size known at compile time.
 ```
 [ <element-type> ; <size> ]
 ```
 `<size>` must be a non-negative integer literal (`INT_LIT`). The element type
 may itself be any `type`, including pointers or nested arrays.
 ```flux
 [u8; 256]          // array of 256 u8 values
 [*u8; 4]           // array of 4 pointers to u8
 [[f32; 3]; 3]      // 3×3 matrix of f32 (array of arrays)
 [*opaque; 8]       // array of 8 opaque pointers
 ```
 ### Type Grammar Summary
 ```ebnf
 type           = primitive_type | named_type | pointer_type | array_type ;
 primitive_type = "u8" | "u16" | "u32" | "u64"
               | "i8" | "i16" | "i32" | "i64"
               | "f32" | "f64" | "bool" | "char" ;
 named_type     = IDENT ;
 pointer_type   = "*" , ( "opaque" | type ) ;
 array_type     = "[" , type , ";" , INT_LIT , "]" ;
 ```
 ---
 ## Struct Literals
 A struct literal constructs a value of a named struct type by providing values for each field.
 ```
 <TypeName> { <field>: <expr>, ... }
 ```
 Fields may appear in any order and need not match the declaration order. No trailing comma is permitted.
 ### Examples
 ```flux
 let p = Point { x: 1.0, y: 2.0 };
 let n = Node {
    value: 42,
    next: get_next()
 };
 // Nested struct literal
 let outer = Rect {
    origin: Point { x: 0.0, y: 0.0 },
    size: Point { x: 10.0, y: 5.0 }
 };
 // Empty struct
 let u = Unit {};
 ```
 ### Struct Literals in Conditions
 Struct literals are **not permitted** as the outermost expression in `if` and `while` conditions. This restriction exists because `{` after the condition is ambiguous — it could start a struct literal body or the statement block.
 ```flux
 // ERROR — ambiguous: is `{` a struct body or the if block?
 if Flags { verbose: true } { ... }
 // OK — parentheses resolve the ambiguity
 if (Flags { verbose: true }).verbose { ... }
 ```
 The grammar enforces this through the `expr_ns` (no-struct) hierarchy used in condition positions. Struct literals remain valid everywhere else: `let`, `return`, function arguments, field values, etc.
 ### Struct Literal Grammar Summary
 ```ebnf
 primary_expr      = IDENT , [ struct_lit_body ] | INT_LIT | FLOAT_LIT
                  | STRING_LIT | CHAR_LIT | "true" | "false"
                  | "(" , expr , ")" ;
 struct_lit_body   = "{" , struct_field_list , "}" ;
 struct_field_list = [ struct_field , { "," , struct_field } ] ;
 struct_field      = IDENT , ":" , expr ;
 ```
 ### No-Struct Expression (`expr_ns`)
 `expr_ns` is a parallel expression hierarchy identical to `expr` except its primary level (`primary_expr_ns`) does not allow the `struct_lit_body` suffix after an `IDENT`. Struct literals are still permitted when enclosed in parentheses (`"(" , expr , ")"`), because the `(` unambiguously marks the start of a grouped expression.
 `if_stmt` and `while_stmt` use `expr_ns` for their condition; all other expression positions use the full `expr`.
 ---
 ## Statements
 Statements perform an action and do not produce a value. Each statement is
 terminated by a semicolon `;`.
 ### Let Statement
 Introduces a new named binding in the current scope.
 ```
 let [mut] <name> [: <type>] [= <expr>] ;
 ```
 | Part       | Required | Description                                   |
 | ---------- | -------- | --------------------------------------------- |
 | `mut`      | no       | Makes the binding mutable; omit for immutable |
 | `<name>`   | yes      | The identifier being bound                    |
 | `: <type>` | no       | Explicit type annotation                      |
 | `= <expr>` | no       | Initialiser expression                        |
 | `;`        | yes      | Statement terminator                          |
 Bindings are **immutable by default**. Attempting to assign to a binding
 declared without `mut` is a compile-time error.
 At least one of the type annotation or the initialiser must be present so the
 compiler can determine the binding's type. This is a semantic constraint, not a
 syntactic one — the grammar permits bare `let x;` and the type checker rejects
 it if no type can be inferred from context.
 #### Examples
 ```flux
 // Immutable, type inferred from initialiser
 let x = 42;
 // Immutable, explicit type
 let y: f64 = 3.14;
 // Mutable, type inferred
 let mut count = 0;
 // Mutable, explicit type, no initialiser (must be assigned before use)
 let mut buf: [u8; 128];
 // Mutable pointer to u32
 let mut ptr: *u32 = &value;
 // Shadowing a previous binding is allowed
 let x = "hello";   // x is now a string, previous x is gone
 ```
 ### Return Statement
 Exits the enclosing function immediately, optionally producing a return value.
 ```
 return [<expr>] ;
 ```
 `return;` (no expression) is used when the function's return type is the unit
 type `()`. `return <expr>;` returns the value of the expression.
 Explicit `return` is only needed for early exits. The idiomatic way to return a
 value from a function is the implicit return of its body block.
 ```flux
 return;               // unit return
 return 42;            // return an integer
 return x * 2 + 1;    // return an expression
 ```
 ### Expression Statement
 Evaluates an expression for its side effects; the resulting value is
 discarded. A semicolon is required.
 ```
 <expr> ;
 ```
 ```flux
 do_something(x);    // call for side effects
 count + 1;          // legal but silly — value discarded
 ```
 ### Statement Grammar Summary
 ```ebnf
 stmt          = let_stmt | return_stmt | if_stmt
              | while_stmt | loop_stmt | break_stmt | continue_stmt
              | block_stmt | expr_stmt ;
 let_stmt      = "let" , [ "mut" ] , IDENT , [ ":" , type ] , [ "=" , expr ] , ";" ;
 return_stmt   = "return" , [ expr ] , ";" ;
 if_stmt       = "if" , expr_ns , block_stmt , [ "else" , else_branch ] ;
 else_branch   = if_stmt | block_stmt ;
 while_stmt    = "while" , expr_ns , block_stmt ;
 loop_stmt     = "loop" , block_stmt ;
 break_stmt    = "break" , ";" ;
 continue_stmt = "continue" , ";" ;
 block_stmt    = "{" , { stmt } , "}" ;
 expr_stmt     = expr , ";" ;
 ```
 ---
 ## If Statement
 Conditionally executes a block based on a boolean expression.
 ```
 if <cond> <block> [else <else-branch>]
 ```
 The condition `<cond>` must be an expression of type `bool`. The body is
 always a `block_stmt` — braces are mandatory.
 ### Else Branch
 The optional `else` branch is either a plain block or another `if` statement,
 enabling `else if` chains of arbitrary length.
 ```flux
 if x > 0 {
    pos();
 }
 if x > 0 {
    pos();
 } else {
    non_pos();
 }
 if x > 0 {
    pos();
 } else if x < 0 {
    neg();
 } else {
    zero();
 }
 ```
 ### If Statement Grammar Summary
 ```ebnf
 if_stmt     = "if" , expr_ns , block_stmt , [ "else" , else_branch ] ;
 else_branch = if_stmt | block_stmt ;
 ```
 ---
 ## While Loop
 Repeatedly executes a block as long as a boolean condition holds. The
 condition is tested before each iteration; if it is false on entry, the body
 never runs.
 ```
 while <cond> <block>
 ```
 ```flux
 let mut i = 0;
 while i < 10 {
    process(i);
    i = i + 1;
 }
 ```
 ### While Loop Grammar Summary
 ```ebnf
 while_stmt = "while" , expr_ns , block_stmt ;
 ```
 ---
 ## Loop
 Executes a block unconditionally and indefinitely. The loop runs until a
 `break` or `return` inside the body transfers control out.
 ```
 loop <block>
 ```
 ```flux
 loop {
    let msg = recv();
    if msg.is_quit() {
        break;
    }
    handle(msg);
 }
 ```
 ### Loop Grammar Summary
 ```ebnf
 loop_stmt = "loop" , block_stmt ;
 ```
 ---
 ## Break and Continue
 `break` and `continue` are only valid inside the body of a `while` or `loop`.
 The compiler enforces this as a semantic rule.
 | Statement    | Effect                                                         |
 | ------------ | -------------------------------------------------------------- |
 | `break ;`    | Exits the immediately enclosing loop immediately               |
 | `continue ;` | Skips the rest of the current iteration; jumps to the next one |
 For `while`, `continue` jumps back to the condition check. For `loop`,
 `continue` jumps back to the top of the body.
 ```flux
 let mut i = 0;
 while i < 20 {
    i = i + 1;
    if i % 2 == 0 {
        continue;   // skip even numbers
    }
    if i > 15 {
        break;      // stop after 15
    }
    process(i);
 }
 ```
 ### Break / Continue Grammar Summary
 ```ebnf
 break_stmt    = "break" , ";" ;
 continue_stmt = "continue" , ";" ;
 ```
 ---
 ## Block Statement
 A block groups zero or more statements into a single statement and introduces
 a new lexical scope. Blocks do not produce a value.
 ```
 { <stmt>* }
 ```
 ### Scoping
 Bindings declared inside a block are not visible outside it. A binding in an
 inner scope may shadow a name from an outer scope without affecting it.
 ```flux
 let x = 1;
 {
    let x = 2;   // shadows outer x inside this block only
    f(x);        // uses 2
 }
 // x is still 1 here
 ```
 ### Nesting
 Blocks may be nested freely to any depth.
 ```flux
 {
    let a = compute_a();
    {
        let b = compute_b();
        use(a, b);
    }
    // b is no longer in scope here
 }
 ```
 ### Block Grammar Summary
 ```ebnf
 block = "{" , { stmt } , "}" ;
 ```
 ---
 ## Top-Level Definitions
 A Flux source file is a sequence of top-level definitions.
 ```ebnf
 program       = { top_level_def } ;
 top_level_def = func_def | struct_def ;
 ```
 The leading token unambiguously selects the definition kind: `fn` → function,
 `struct` → struct.
 ---
 ## Function Definition
 Defines a named, callable function.
 ```
 fn <name> ( [<params>] ) [-> <return-type>] <block>
 ```
 | Part               | Required | Description                                              |
 | ------------------ | -------- | -------------------------------------------------------- |
 | `<name>`           | yes      | The function's identifier                                |
 | `( [<params>] )`   | yes      | Comma-separated parameter list, may be empty             |
 | `-> <return-type>` | no       | Return type; omitting it means the function returns `()` |
 | `<block>`          | yes      | Function body — a `block_stmt`                           |
 ### Parameters
 Each parameter is a name with a mandatory type annotation. Parameters are
 immutable by default; `mut` makes the local binding mutable within the body.
 ```
 [mut] <name> : <type>
 ```
 ```flux
 fn add(a: i32, b: i32) -> i32 {
    return a + b;
 }
 fn greet(name: *u8) {
    print(name);
 }
 fn increment(mut x: i32) -> i32 {
    x = x + 1;
    return x;
 }
 fn apply(f: *opaque, mut buf: [u8; 64]) -> bool {
    return call(f, &buf);
 }
 ```
 ### Return Type
 If `->` is omitted the return type is implicitly `()` (the unit type). An
 explicit `-> ()` is also permitted but redundant.
 ```flux
 fn do_work() {          // returns ()
    side_effect();
 }
 fn get_value() -> i64 { // returns i64
    return 42;
 }
 ```
 ### Function Definition Grammar Summary
 ```ebnf
 func_def   = "fn" , IDENT , "(" , param_list , ")" , [ "->" , type ] , block_stmt ;
 param_list = [ param , { "," , param } ] ;
 param      = [ "mut" ] , IDENT , ":" , type ;
 ```
 ---
 ## Struct Definition
 Defines a named product type with zero or more typed fields.
 ```
 struct <name> {
    <field>: <type>,
    ...
 }
 ```
 Fields are separated by commas. No trailing comma is permitted. An empty
 struct (zero fields) is valid.
 ### Fields
 Each field is a name and a type. Fields may be of any type including pointers,
 arrays, and other structs. Field names must be unique within the struct.
 ```flux
 struct Point {
    x: f32,
    y: f32
 }
 struct Node {
    value: i64,
    next: *Node
 }
 struct Buffer {
    data: *u8,
    len: u64,
    cap: u64
 }
 struct Unit {}
 ```
 ### Member Access
 Fields of a struct value are accessed with the `.` operator (defined in the
 expression grammar). If the value is behind a pointer, dereference it first
 with `*`.
 ```flux
 let p: Point = make_point();
 let x = p.x;
 let ptr: *Point = get_point_ptr();
 let y = (*ptr).y;
 ```
 ### Struct Definition Grammar Summary
 ```ebnf
 struct_def = "struct" , IDENT , "{" , field_list , "}" ;
 field_list = [ field , { "," , field } ] ;
 field      = IDENT , ":" , type ;
 ```
--- a/examples/fibonacci.flx
+++ b/examples/fibonacci.flx
@@ -0,0 +1,7 @@
 fn fibonacci(n: u8) -> u64 {
    if n < 2 {
        return n;
    }
    return fibonacci(n - 1) + fibonacci(n - 2);
 }
--- a/ll1_check.py
+++ b/ll1_check.py
@@ -0,0 +1,362 @@
 #!/usr/bin/env python3
 """
 ll1_check.py — Parse GRAMMAR.ebnf and verify the LL(1) property.
 Usage:  python ll1_check.py [grammar_file]  [-v]
 Algorithm
 ---------
 1. Strip (* … *) comments; tokenise.
 2. Parse ISO/IEC 14977 EBNF into an AST.
 3. Normalise to plain BNF by introducing fresh helper non-terminals:
     { body }  →  _repN   where  _repN = body , _repN | ε
     [ body ]  →  _optN   where  _optN = body | ε
     ( body )  →  inlined (cross-product inside the parent sequence)
 4. Compute FIRST and FOLLOW sets (fixed-point iteration).
 5. For each non-terminal compute PREDICT sets; flag pairwise conflicts.
 """
 import re
 import sys
 from collections import defaultdict
 from itertools import count as _count
 from pathlib import Path
 EPSILON = 'ε'
 START   = 'program'          # grammar start symbol
 # ═══════════════════════════════════════════════════════════════ 1. Tokenise
 _TOK = re.compile(
    r'"(?:[^"\\]|\\.)*"'    # "quoted terminal string"
    r'|[A-Z][A-Z0-9_]*'    # UPPERCASE token class  (terminal)
    r'|[a-z][a-z0-9_]*'    # lowercase identifier   (non-terminal)
    r'|[=;,|()\[\]{}]'     # single-char punctuation
 )
 def tokenise(src: str) -> list:
    src = re.sub(r'\(\*.*?\*\)', ' ', src, flags=re.DOTALL)
    return _TOK.findall(src)
 # ═══════════════════════════════════════════════════════════════ 2. Parse EBNF → AST
 #
 # Each AST node is a tuple:
 #   ('lit', s)       terminal  — quoted string "…" or UPPERCASE token class
 #   ('nt',  s)       non-terminal reference
 #   ('seq', [...])   concatenation  (A , B , C)
 #   ('alt', [...])   alternation    (A | B | C)
 #   ('opt', node)    optional       [ … ]
 #   ('rep', node)    repetition     { … }
 class _Parser:
    def __init__(self, tokens):
        self._t = tokens
        self._i = 0
    def _peek(self):
        return self._t[self._i] if self._i < len(self._t) else None
    def _eat(self, expected=None):
        v = self._t[self._i]; self._i += 1
        if expected and v != expected:
            raise SyntaxError(f'expected {expected!r}, got {v!r} '
                              f'(token #{self._i - 1})')
        return v
    def parse_grammar(self) -> dict:
        rules = {}
        while self._i < len(self._t):
            name = self._eat()
            self._eat('=')
            rules[name] = self._body()
            self._eat(';')
        return rules
    def _body(self):
        alts = [self._seq()]
        while self._peek() == '|':
            self._eat()
            alts.append(self._seq())
        return alts[0] if len(alts) == 1 else ('alt', alts)
    def _seq(self):
        items = [self._atom()]
        while self._peek() == ',':
            self._eat()
            items.append(self._atom())
        return items[0] if len(items) == 1 else ('seq', items)
    def _atom(self):
        t = self._peek()
        if t == '[':
            self._eat(); b = self._body(); self._eat(']')
            return ('opt', b)
        if t == '{':
            self._eat(); b = self._body(); self._eat('}')
            return ('rep', b)
        if t == '(':
            self._eat(); b = self._body(); self._eat(')')
            return b                         # group — return inner node directly
        if t and (t[0] == '"' or t[0].isupper()):
            return ('lit', self._eat())
        if t and t[0].islower():
            return ('nt',  self._eat())
        raise SyntaxError(f'unexpected token {t!r}')
 # ═══════════════════════════════════════════════════════════════ 3. Normalise
 def normalise(ebnf: dict) -> tuple:
    """
    Convert EBNF AST to plain BNF.
    Returns
    -------
    bnf     : dict[name → list[list[str]]]
                Each inner list is one production; [] = ε production.
    origins : dict[helper_name → parent_rule_name]
                Maps generated helper names back to the rule that created them.
    """
    bnf:     dict = {}
    origins: dict = {}
    ctr = _count()
    def fresh(tag: str, rule: str) -> str:
        h = f'_{tag}{next(ctr)}'
        origins[h] = rule
        return h
    def expand(node, rule: str, in_seq: bool = False) -> list:
        """
        Return a list of alternative symbol sequences for this AST node.
        in_seq: when True, an 'alt' node is wrapped in a fresh non-terminal
                instead of being inlined.  This prevents the cross-product
                expansion of  A , (B | C) , D  from producing two productions
                that both start with A — a common-prefix false positive that
                would be misreported as an LL(1) conflict.  The grammar is
                already left-factored at the EBNF level; this preserves that.
        """
        tag = node[0]
        if tag == 'lit':
            return [[node[1]]]
        if tag == 'nt':
            return [[node[1]]]
        if tag == 'seq':
            # Children of a seq are expanded with in_seq=True so that any
            # alt node inside the sequence becomes a fresh non-terminal.
            result = [[]]
            for child in node[1]:
                child_seqs = expand(child, rule, in_seq=True)
                result = [a + b for a in result for b in child_seqs]
            return result
        if tag == 'alt':
            if in_seq:
                # Alt inside a seq: wrap in a fresh non-terminal (_grpN).
                # Each alternative is expanded at top-level (in_seq=False).
                h = fresh('grp', rule)
                bnf[h] = [s for child in node[1]
                          for s in expand(child, rule, in_seq=False)]
                return [[h]]
            # Alt at the top level of a rule body: return alternatives directly.
            return [s for child in node[1]
                    for s in expand(child, rule, in_seq=False)]
        if tag == 'opt':
            # [ body ]  →  _optN = body | ε
            h = fresh('opt', rule)
            bnf[h] = expand(node[1], rule) + [[]]
            return [[h]]
        if tag == 'rep':
            # { body }  →  _repN = body , _repN | ε
            h = fresh('rep', rule)
            body_seqs = expand(node[1], rule)
            bnf[h] = [s + [h] for s in body_seqs] + [[]]
            return [[h]]
        raise ValueError(f'unknown AST tag {tag!r}')
    for name, node in ebnf.items():
        bnf[name] = expand(node, name)
    return bnf, origins
 # ═══════════════════════════════════════════════════════════════ 4. FIRST / FOLLOW
 def first_of_seq(seq: list, first: dict, bnf: dict) -> set:
    """
    FIRST set of a sequence of grammar symbols.
    Returns a set of terminal strings; includes EPSILON if the whole
    sequence can derive the empty string.
    """
    result = set()
    for sym in seq:
        if sym not in bnf:              # terminal symbol
            result.add(sym)
            return result               # terminals never derive ε
        sym_first = first[sym]
        result |= sym_first - {EPSILON}
        if EPSILON not in sym_first:
            return result               # this symbol is not nullable — stop
    result.add(EPSILON)                 # every symbol in seq was nullable
    return result
 def compute_first(bnf: dict) -> dict:
    first = defaultdict(set)
    changed = True
    while changed:
        changed = False
        for name, prods in bnf.items():
            for prod in prods:
                new = first_of_seq(prod, first, bnf)
                if not new <= first[name]:
                    first[name] |= new
                    changed = True
    return first
 def compute_follow(bnf: dict, first: dict, start: str) -> dict:
    follow = defaultdict(set)
    follow[start].add('$')
    changed = True
    while changed:
        changed = False
        for name, prods in bnf.items():
            for prod in prods:
                for i, sym in enumerate(prod):
                    if sym not in bnf:
                        continue                    # skip terminals
                    # FIRST of what comes after sym in this production
                    rest_first = first_of_seq(prod[i + 1:], first, bnf)
                    before = len(follow[sym])
                    follow[sym] |= rest_first - {EPSILON}
                    if EPSILON in rest_first:
                        follow[sym] |= follow[name]
                    if len(follow[sym]) > before:
                        changed = True
    return follow
 # ═══════════════════════════════════════════════════════════════ 5. LL(1) check
 def predict_set(prod: list, name: str, first: dict, follow: dict, bnf: dict) -> set:
    """
    PREDICT(A → prod) = (FIRST(prod) − {ε})  ∪  (FOLLOW(A) if ε ∈ FIRST(prod))
    """
    f = first_of_seq(prod, first, bnf)
    p = f - {EPSILON}
    if EPSILON in f:
        p |= follow[name]
    return p
 def check_ll1(bnf: dict, first: dict, follow: dict) -> list:
    """
    For each non-terminal check that all PREDICT sets are pairwise disjoint.
    Returns a list of conflict dicts.
    """
    errors = []
    for name, prods in bnf.items():
        sets = [predict_set(p, name, first, follow, bnf) for p in prods]
        for i in range(len(sets)):
            for j in range(i + 1, len(sets)):
                conflict = sets[i] & sets[j]
                if conflict:
                    errors.append({
                        'rule':     name,
                        'prod_i':   prods[i],
                        'prod_j':   prods[j],
                        'conflict': sorted(conflict),
                    })
    return errors
 # ═══════════════════════════════════════════════════════════════ 6. Main
 def _fmt_prod(prod: list) -> str:
    return ' '.join(prod) if prod else EPSILON
 def main():
    argv      = sys.argv[1:]
    verbose   = '-v' in argv
    positional = [a for a in argv if not a.startswith('-')]
    path      = Path(positional[0]) if positional else Path('GRAMMAR.ebnf')
    # ── Load & parse ──────────────────────────────────────────────────────
    print(f'Checking {path} …')
    try:
        src = path.read_text(encoding='utf-8')
    except FileNotFoundError:
        sys.exit(f'error: file not found: {path}')
    toks = tokenise(src)
    try:
        ebnf = _Parser(toks).parse_grammar()
    except SyntaxError as exc:
        sys.exit(f'EBNF parse error: {exc}')
    bnf, origins = normalise(ebnf)
    first  = compute_first(bnf)
    follow = compute_follow(bnf, first, START)
    errors = check_ll1(bnf, first, follow)
    # ── Summary line ──────────────────────────────────────────────────────
    named   = sorted(n for n in bnf if not n.startswith('_'))
    helpers = sorted(n for n in bnf if     n.startswith('_'))
    print(f'  {len(named)} named rules, {len(helpers)} generated helper rules\n')
    # ── Optional verbose output ───────────────────────────────────────────
    if verbose:
        col = max((len(n) for n in named), default=0) + 2
        print('── FIRST sets (named rules) ──────────────────────────────')
        for n in named:
            syms     = sorted(first[n] - {EPSILON})
            nullable = '  [nullable]' if EPSILON in first[n] else ''
            print(f'  FIRST({n}){"":<{col - len(n)}}= {{ {", ".join(syms)} }}{nullable}')
        print()
        print('── FOLLOW sets (named rules) ─────────────────────────────')
        for n in named:
            syms = sorted(follow[n])
            print(f'  FOLLOW({n}){"":<{col - len(n)}}= {{ {", ".join(syms)} }}')
        print()
    # ── LL(1) result ──────────────────────────────────────────────────────
    named_err  = [e for e in errors if not e['rule'].startswith('_')]
    helper_err = [e for e in errors if     e['rule'].startswith('_')]
    if not errors:
        print('✓  Grammar is LL(1) — no conflicts detected.')
        return
    print(f'✗  {len(errors)} conflict(s): '
          f'{len(named_err)} in named rules, '
          f'{len(helper_err)} in generated helpers\n')
    for e in named_err:
        print(f'  Rule [{e["rule"]}]')
        print(f'    alt A : {_fmt_prod(e["prod_i"])}')
        print(f'    alt B : {_fmt_prod(e["prod_j"])}')
        print(f'    ambiguous token(s): {e["conflict"]}\n')
    if helper_err:
        print('  Conflicts in generated helpers '
              '(each is linked back to its enclosing named rule):')
        for e in helper_err:
            orig = origins.get(e['rule'], '?')
            print(f'  [{e["rule"]}]  ← from rule [{orig}]')
            print(f'    alt A : {_fmt_prod(e["prod_i"])}')
            print(f'    alt B : {_fmt_prod(e["prod_j"])}')
            print(f'    ambiguous token(s): {e["conflict"]}\n')
 if __name__ == '__main__':
    main()