From 73e36fac711a9ea27264ddf8ef84bccd6e24d6dd Mon Sep 17 00:00:00 2001
From: Jooris Hadeler <jooris@hadeler.me>
Date: Tue, 10 Mar 2026 14:41:54 +0100
Subject: [PATCH] Initial Flux language specification

Add the LL(1) context-free grammar (GRAMMAR.ebnf), token and syntax
reference (SYNTAX.md), LL(1) verification tool (ll1_check.py), and a
fibonacci example demonstrating the language.
---
 GRAMMAR.ebnf           | 435 ++++++++++++++++++++++
 SYNTAX.md              | 803 +++++++++++++++++++++++++++++++++++++++++
 examples/fibonacci.flx |   7 +
 ll1_check.py           | 362 +++++++++++++++++++
 4 files changed, 1607 insertions(+)
 create mode 100644 GRAMMAR.ebnf
 create mode 100644 SYNTAX.md
 create mode 100644 examples/fibonacci.flx
 create mode 100644 ll1_check.py

diff --git a/GRAMMAR.ebnf b/GRAMMAR.ebnf
new file mode 100644
index 0000000..9690b3d
--- /dev/null
+++ b/GRAMMAR.ebnf
@@ -0,0 +1,435 @@
+(* Flux Language Grammar — Context-Free LL(1) Grammar               *)
+(* ================================================================ *)
+(*                                                                  *)
+(* Notation (ISO/IEC 14977 EBNF):                                   *)
+(*   rule = definition ;        defines a rule (terminated by ;)    *)
+(*   a , b                      concatenation                       *)
+(*   a | b                      alternation                         *)
+(*   { a }                      zero or more repetitions of a       *)
+(*   [ a ]                      optional a  (zero or one)           *)
+(*   ( a | b )                  grouping                            *)
+(*   "literal"                  terminal string                     *)
+(*                                                                  *)
+(* UPPERCASE identifiers are lexical token classes whose value      *)
+(* cannot be expressed as a single literal (e.g. IDENT, INT_LIT).   *)
+(* They are NOT defined here — see SYNTAX.md.                       *)
+(*                                                                  *)
+(* Unique/fixed tokens are written as quoted literals directly.     *)
+(*                                                                  *)
+(* Lowercase identifiers are non-terminals (grammar productions).   *)
+
+
+(* ================================================================ *)
+(* Program (start symbol)                                           *)
+(* ================================================================ *)
+
+program = { top_level_def } ;
+
+top_level_def = func_def
+              | struct_def ;
+
+
+(* ================================================================ *)
+(* Expressions                                                      *)
+(* ================================================================ *)
+
+expr = or_expr ;
+
+
+(* --- Logical OR (lowest-precedence binary operator) ---           *)
+(*                                                                  *)
+(* Uses keyword `or`; left-associative via iteration.               *)
+
+or_expr = and_expr , { "or" , and_expr } ;
+
+
+(* --- Logical AND ---                                              *)
+(*                                                                  *)
+(* Uses keyword `and`; left-associative via iteration.              *)
+
+and_expr = bitor_expr , { "and" , bitor_expr } ;
+
+
+(* --- Bitwise OR ---                                               *)
+
+bitor_expr = bitxor_expr , { "|" , bitxor_expr } ;
+
+
+(* --- Bitwise XOR ---                                              *)
+
+bitxor_expr = bitand_expr , { "^" , bitand_expr } ;
+
+
+(* --- Bitwise AND ---                                              *)
+
+bitand_expr = additive_expr , { "&" , additive_expr } ;
+
+
+(* --- Additive: addition and subtraction ---                       *)
+
+additive_expr = multiplicative_expr ,
+                { ( "+" | "-" ) , multiplicative_expr } ;
+
+
+(* --- Multiplicative: multiplication, division, modulo ---         *)
+
+multiplicative_expr = unary_expr ,
+                      { ( "*" | "/" | "%" ) , unary_expr } ;
+
+
+(* --- Unary operators (prefix, right-associative by recursion) --- *)
+(*                                                                  *)
+(* "!"  logical not                                                 *)
+(* "~"  bitwise not                                                 *)
+(* "-"  arithmetic negation                                         *)
+(* "*"  dereference (pointer indirection)                           *)
+(* "&"  address-of                                                  *)
+
+unary_expr = "!" , unary_expr
+           | "~" , unary_expr
+           | "-" , unary_expr
+           | "*" , unary_expr
+           | "&" , unary_expr
+           | postfix_expr ;
+
+
+(* --- Postfix operators (left-associative via iteration) ---       *)
+(*                                                                  *)
+(* Postfix operators bind tighter than any prefix or binary form.   *)
+(* Multiple postfix operations chain left-to-right.                 *)
+
+postfix_expr = primary_expr , { postfix_op } ;
+
+postfix_op = "." , IDENT                         (* member access   *)
+           | "[" , expr , "]"                    (* subscript/index *)
+           | "(" , arg_list , ")" ;              (* function call   *)
+
+
+(* --- Primary expressions (highest precedence) ---                 *)
+(*                                                                  *)
+(* LL(1) note: after IDENT, peek at the next token.                 *)
+(*   "{" → parse struct_lit_body (struct literal)                   *)
+(*   other → bare identifier reference                              *)
+
+primary_expr = IDENT , [ struct_lit_body ]   (* ident or struct lit *)
+             | INT_LIT
+             | FLOAT_LIT
+             | STRING_LIT
+             | CHAR_LIT
+             | "true"
+             | "false"
+             | "(" , expr , ")" ;             (* parenthesised      *)
+
+
+(* --- Struct literal ---                                           *)
+(*                                                                  *)
+(* A struct literal constructs a value of a named struct type.      *)
+(*   IDENT "{" field: expr, ... "}"                                 *)
+(*                                                                  *)
+(* Field order need not match the struct definition order.          *)
+(* No trailing comma is permitted (consistent with struct_def).     *)
+(*                                                                  *)
+(* LL(1) notes:                                                     *)
+(*   struct_field_list: "}" → ε; IDENT → first field                *)
+(*   FIRST(struct_field) = {IDENT}                                  *) 
+(*   FOLLOW(struct_field_list) = {"}"}                              *)
+(*   Disjoint, so no look-ahead conflict.                           *)
+
+struct_lit_body   = "{" , struct_field_list , "}" ;
+
+struct_field_list = [ struct_field , { "," , struct_field } ] ;
+
+struct_field = IDENT , ":" , expr ;
+
+
+(* ================================================================ *)
+(* Argument List                                                    *)
+(* ================================================================ *)
+
+arg_list = [ expr , { "," , expr } ] ;
+
+
+(* ================================================================ *)
+(* No-Struct Expression Hierarchy (expr_ns)                         *)
+(* ================================================================ *)
+(*                                                                  *)
+(* Struct literals create an LL(1) ambiguity in if/while conditions:*)
+(*   if Point { x: 1 } { ... }                                      *)
+(* After "Point", "{" could open a struct literal OR the body block.*)
+(*                                                                  *)
+(* Solution: define expr_ns — identical to expr except              *)
+(* primary_expr_ns disallows the struct_lit_body suffix after IDENT.*)
+(* Struct literals ARE still allowed when parenthesised:            *)
+(*   if (Point { x: 1 }).flag { ... }                               *)
+(*                                                                  *)
+(* if_stmt and while_stmt use expr_ns for their condition.          *)
+(* All other expression positions use the full expr.                *)
+
+expr_ns = or_expr_ns ;
+
+or_expr_ns  = and_expr_ns , { "or"  , and_expr_ns } ;
+and_expr_ns = bitor_expr_ns , { "and" , bitor_expr_ns } ;
+
+bitor_expr_ns  = bitxor_expr_ns , { "|" , bitxor_expr_ns } ;
+bitxor_expr_ns = bitand_expr_ns , { "^" , bitand_expr_ns } ;
+bitand_expr_ns = additive_expr_ns , { "&" , additive_expr_ns } ;
+
+additive_expr_ns = multiplicative_expr_ns ,
+                   { ( "+" | "-" ) , multiplicative_expr_ns } ;
+
+multiplicative_expr_ns = unary_expr_ns ,
+                         { ( "*" | "/" | "%" ) , unary_expr_ns } ;
+
+unary_expr_ns = "!" , unary_expr_ns
+              | "~" , unary_expr_ns
+              | "-" , unary_expr_ns
+              | "*" , unary_expr_ns
+              | "&" , unary_expr_ns
+              | postfix_expr_ns ;
+
+postfix_expr_ns = primary_expr_ns , { postfix_op } ;
+
+(* primary_expr_ns: same as primary_expr but IDENT is never         *)
+(* followed by struct_lit_body. Note "(" , expr , ")" uses full     *)
+(* expr, so struct literals are permitted inside parentheses.       *)
+
+primary_expr_ns = IDENT                          (* bare ident only *)
+                | INT_LIT
+                | FLOAT_LIT
+                | STRING_LIT
+                | CHAR_LIT
+                | "true"
+                | "false"
+                | "(" , expr , ")" ;          (* struct lit OK here *)
+
+
+(* ================================================================ *)
+(* Types                                                            *)
+(* ================================================================ *)
+
+type = primitive_type
+     | named_type
+     | pointer_type
+     | array_type ;
+
+
+(* --- Primitive types ---                                          *)
+(*                                                                  *)
+(* Unsigned integers : u8  u16  u32  u64                            *)
+(* Signed integers   : i8  i16  i32  i64                            *)
+(* Floating-point    : f32  f64                                     *)
+(* Other             : bool  char                                   *)
+
+primitive_type = "u8"  | "u16" | "u32" | "u64"
+               | "i8"  | "i16" | "i32" | "i64"
+               | "f32" | "f64"
+               | "bool" | "char" ;
+
+
+(* --- Named types ---                                              *)
+(*                                                                  *)
+(* A user-defined type referenced by its identifier (e.g. a struct  *)
+(* name). The lexer guarantees that all primitive-type keywords are *)
+(* reserved, so IDENT never clashes with primitive_type.            *)
+
+named_type = IDENT ;
+
+
+(* --- Pointer types ---                                            *)
+(*                                                                  *)
+(* "*" type    — typed pointer; the pointee type is known.          *)
+(* "*opaque"   — untyped/opaque pointer (no pointee type info).     *)
+(*                                                                  *)
+(* LL(1) note: after "*", "opaque" is not in FIRST(type), so the    *)
+(* two alternatives are always distinguishable with one token.      *)
+
+pointer_type = "*" , ( "opaque" | type ) ;
+
+
+(* --- Array types ---                                              *)
+(*                                                                  *)
+(* "[" type ";" INT_LIT "]"                                         *)
+(*                                                                  *)
+(* The element type and the fixed size (a non-negative integer      *)
+(* literal) are separated by ";". Sizes that are constant           *)
+(* expressions may be introduced in a later grammar revision.       *)
+
+array_type = "[" , type , ";" , INT_LIT , "]" ;
+
+
+(* ================================================================ *)
+(* Statements                                                       *)
+(* ================================================================ *)
+
+stmt = let_stmt
+     | return_stmt
+     | if_stmt
+     | while_stmt
+     | loop_stmt
+     | break_stmt
+     | continue_stmt
+     | block_stmt
+     | expr_stmt ;
+
+
+(* --- Return statement ---                                         *)
+(*                                                                  *)
+(* Exits the enclosing function, optionally yielding a value.       *)
+(* "return ;" is used when the function return type is ().          *)
+(*                                                                  *)
+(* LL(1): after "return", peek at next token.                       *)
+(*   ";" → no expression (unit return)                              *)
+(*   other → parse expr, then expect ";"                            *)
+(* ";" is not in FIRST(expr), so the two cases are unambiguous.     *)
+
+return_stmt = "return" , [ expr ] , ";" ;
+
+
+(* --- Expression statement ---                                     *)
+(*                                                                  *)
+(* Evaluates an expression for its side effects; the value is       *)
+(* discarded. The ";" is mandatory.                                 *)
+(*                                                                  *)
+(* LL(1): at stmt level:                                            *)
+(*   "let"      → let_stmt                                          *)
+(*   "return"   → return_stmt                                       *)
+(*   "if"       → if_stmt                                           *)
+(*   "while"    → while_stmt                                        *)
+(*   "loop"     → loop_stmt                                         *)
+(*   "break"    → break_stmt                                        *)
+(*   "continue" → continue_stmt                                     *)
+(*   "{"        → block_stmt                                        *)
+(*   other      → expr_stmt                                         *)
+
+expr_stmt = expr , ";" ;
+
+
+(* --- If statement ---                                             *)
+(*                                                                  *)
+(* Conditionally executes a block. An optional "else" branch may    *)
+(* follow; it is either a plain block or another "if" statement,    *)
+(* enabling "else if" chains of arbitrary length.                   *)
+(*                                                                  *)
+(* LL(1) notes:                                                     *)
+(*   condition uses expr_ns — struct literals are forbidden at the  *)
+(*   outermost level to avoid ambiguity with the body block's "{".  *)
+(*   [ "else" ... ] — consume "else" iff next token is "else"       *)
+(*   else_branch: "if" → if_stmt (else-if); "{" → block_stmt        *)
+(*   The two else_branch alternatives start with distinct tokens,   *)
+(*   so no look-ahead conflict arises (no dangling-else ambiguity). *)
+
+if_stmt     = "if" , expr_ns , block_stmt , [ "else" , else_branch ] ;
+
+else_branch = if_stmt       (* else if *)
+            | block_stmt ;  (* plain else *)
+
+
+(* --- While loop ---                                               *)
+(*                                                                  *)
+(* Repeatedly executes the body as long as the condition is true.   *)
+(* The condition is re-evaluated before every iteration.            *)
+(* If the condition is false on the first check, the body never     *)
+(* executes.                                                        *)
+(*                                                                  *)
+(* Like if_stmt, the condition uses expr_ns to prevent struct       *)
+(* literal ambiguity with the body block's opening "{".             *)
+
+while_stmt = "while" , expr_ns , block_stmt ;
+
+
+(* --- Infinite loop ---                                            *)
+(*                                                                  *)
+(* Executes the body unconditionally and indefinitely. The only     *)
+(* ways to exit are "break" or "return" inside the body.            *)
+
+loop_stmt = "loop" , block_stmt ;
+
+
+(* --- Break and continue ---                                       *)
+(*                                                                  *)
+(* "break"    exits the immediately enclosing "while" or "loop".    *)
+(* "continue" skips the rest of the current iteration and jumps to  *)
+(*            the next condition check (while) or iteration (loop). *)
+(* Both are only valid inside a loop body; the compiler enforces    *)
+(* this as a semantic rule.                                         *)
+
+break_stmt    = "break" , ";" ;
+continue_stmt = "continue" , ";" ;
+
+
+(* --- Block statement ---                                          *)
+(*                                                                  *)
+(* A block groups zero or more statements into a single statement   *)
+(* and introduces a new lexical scope. It does not produce a value. *)
+(*                                                                  *)
+(* LL(1): at stmt level, "{" unambiguously selects block since no   *)
+(* other stmt alternative starts with "{".                          *)
+
+block_stmt = "{" , { stmt } , "}" ;
+
+
+(* --- Let statement ---                                            *)
+(*                                                                  *)
+(* Introduces a named binding in the current scope.                 *)
+(* Bindings are immutable by default; "mut" opts into mutability.   *)
+(*                                                                  *)
+(* The type annotation and the initialiser are both optional, but   *)
+(* at least one must be present for the binding to be usable;       *)
+(* the compiler enforces this as a semantic (not syntactic) rule.   *)
+(*                                                                  *)
+(* LL(1) notes:                                                     *)
+(*   [ "mut" ]   — consume "mut" iff the next token is "mut"        *)
+(*   [ ":" ... ] — consume iff next token is ":"                    *)
+(*   [ "=" ... ] — consume iff next token is "="                    *)
+(*   All decision tokens are distinct, so no look-ahead conflict.   *)
+
+let_stmt = "let" , [ "mut" ] , IDENT ,
+           [ ":" , type ] ,
+           [ "=" , expr ] ,
+           ";" ;
+
+
+(* ================================================================ *)
+(* Top-Level Definitions                                            *)
+(* ================================================================ *)
+
+(* --- Function definition ---                                      *)
+(*                                                                  *)
+(* Defines a named function with a typed parameter list and an      *)
+(* optional return type. Omitting "->" implies a return type of (). *)
+(*                                                                  *)
+(* LL(1) notes:                                                     *)
+(*   param_list: ")" → ε (empty list); else parse first param       *)
+(*   param: "mut" → consume; IDENT → skip (mut absent)              *)
+(*   [ "->" ... ]: consume iff next token is "->"                   *)
+(*   "->" is a two-character token; distinct from all stmt-starting *)
+(*   tokens, so no look-ahead conflict with block_stmt that follows *)
+
+func_def = "fn" , IDENT , "(" , param_list , ")" ,
+           [ "->" , type ] ,
+           block_stmt ;
+
+param_list = [ param , { "," , param } ] ;
+
+(* Each parameter is an optionally-mutable name with a required     *)
+(* type annotation. Mutability applies within the function body.    *)
+
+param = [ "mut" ] , IDENT , ":" , type ;
+
+
+(* --- Struct definition ---                                        *)
+(*                                                                  *)
+(* Defines a named product type with zero or more typed fields.     *)
+(* Fields are separated by commas; no trailing comma is permitted.  *)
+(*                                                                  *)
+(* LL(1) notes:                                                     *)
+(*   field_list: "}" → ε (empty struct); else parse first field     *)
+(*   FIRST(field) = {IDENT}, FOLLOW(field_list) = {"}"}             *)
+(*   Disjoint, so no look-ahead conflict.                           *)
+(*   top_level_def: "fn" → func_def; "struct" → struct_def          *)
+
+struct_def = "struct" , IDENT , "{" , field_list , "}" ;
+
+field_list = [ field , { "," , field } ] ;
+
+field = IDENT , ":" , type ;
diff --git a/SYNTAX.md b/SYNTAX.md
new file mode 100644
index 0000000..361d8b0
--- /dev/null
+++ b/SYNTAX.md
@@ -0,0 +1,803 @@
+# Flux Language Syntax Reference
+
+## Lexical Tokens
+
+All tokens listed here are produced by the lexer (lexical analysis phase) and
+appear as UPPERCASE terminals in `GRAMMAR.ebnf`.
+
+### Literals
+
+| Token        | Description                                                         | Examples                       |
+| ------------ | ------------------------------------------------------------------- | ------------------------------ |
+| `INT_LIT`    | Integer literal (decimal, hex `0x`, octal `0o`, binary `0b`)        | `42`, `0xFF`, `0o77`, `0b1010` |
+| `FLOAT_LIT`  | Floating-point literal                                              | `3.14`, `1.0e-9`, `0.5`        |
+| `STRING_LIT` | Double-quoted UTF-8 string, supports `\n \t \\ \"` escape sequences | `"hello\nworld"`               |
+| `CHAR_LIT`   | Single-quoted Unicode scalar value                                  | `'a'`, `'\n'`, `'\u{1F600}'`   |
+| `TRUE`       | Boolean true literal                                                | `true`                         |
+| `FALSE`      | Boolean false literal                                               | `false`                        |
+
+### Identifier
+
+| Token   | Description                                                                                                  |
+| ------- | ------------------------------------------------------------------------------------------------------------ |
+| `IDENT` | Identifier: starts with a letter or `_`, followed by letters, digits, or `_`. Unicode letters are permitted. |
+
+### Operator Tokens
+
+| Token     | Lexeme | Description                            |
+| --------- | ------ | -------------------------------------- |
+| `PLUS`    | `+`    | Addition / unary plus (not in grammar) |
+| `MINUS`   | `-`    | Subtraction / unary negation           |
+| `STAR`    | `*`    | Multiplication / pointer dereference   |
+| `SLASH`   | `/`    | Division                               |
+| `PERCENT` | `%`    | Modulo (remainder)                     |
+| `AMP`     | `&`    | Bitwise AND / address-of               |
+| `PIPE`    | `\|`   | Bitwise OR                             |
+| `CARET`   | `^`    | Bitwise XOR                            |
+| `BANG`    | `!`    | Logical NOT                            |
+| `TILDE`   | `~`    | Bitwise NOT                            |
+| `DOT`     | `.`    | Member access                          |
+
+### Keyword Tokens
+
+#### Operator Keywords
+
+| Lexeme | Description |
+| ------ | ----------- |
+| `and`  | Logical AND |
+| `or`   | Logical OR  |
+
+#### Boolean Literals
+
+| Lexeme  | Description         |
+| ------- | ------------------- |
+| `true`  | Boolean true value  |
+| `false` | Boolean false value |
+
+#### Primitive Type Keywords
+
+| Lexeme | Description                    |
+| ------ | ------------------------------ |
+| `u8`   | Unsigned 8-bit integer         |
+| `u16`  | Unsigned 16-bit integer        |
+| `u32`  | Unsigned 32-bit integer        |
+| `u64`  | Unsigned 64-bit integer        |
+| `i8`   | Signed 8-bit integer           |
+| `i16`  | Signed 16-bit integer          |
+| `i32`  | Signed 32-bit integer          |
+| `i64`  | Signed 64-bit integer          |
+| `f32`  | 32-bit IEEE 754 floating-point |
+| `f64`  | 64-bit IEEE 754 floating-point |
+| `bool` | Boolean (`true` or `false`)    |
+| `char` | Unicode scalar value (32-bit)  |
+
+#### Pointer Keyword
+
+| Lexeme   | Description                                             |
+| -------- | ------------------------------------------------------- |
+| `opaque` | Used in `*opaque` to denote a pointer with no type info |
+
+#### Statement Keywords
+
+| Lexeme     | Description                           |
+| ---------- | ------------------------------------- |
+| `let`      | Introduces a variable binding         |
+| `mut`      | Marks a binding or pointer as mutable |
+| `return`   | Exits the enclosing function          |
+| `if`       | Conditional statement                 |
+| `else`     | Alternative branch of an `if`         |
+| `while`    | Condition-controlled loop             |
+| `loop`     | Infinite loop                         |
+| `break`    | Exit the immediately enclosing loop   |
+| `continue` | Skip to the next iteration of a loop  |
+
+#### Definition Keywords
+
+| Lexeme   | Description                      |
+| -------- | -------------------------------- |
+| `fn`     | Introduces a function definition |
+| `struct` | Introduces a struct definition   |
+
+> **Lexer note:** All keywords above are reserved and must be recognised before
+> the general `IDENT` rule. An identifier may not shadow any keyword.
+
+### Delimiter / Punctuation Tokens
+
+| Token       | Lexeme | Description                                            |
+| ----------- | ------ | ------------------------------------------------------ |
+| `LPAREN`    | `(`    | Left parenthesis                                       |
+| `RPAREN`    | `)`    | Right parenthesis                                      |
+| `LBRACKET`  | `[`    | Left square bracket                                    |
+| `RBRACKET`  | `]`    | Right square bracket                                   |
+| `COMMA`     | `,`    | Argument / element separator                           |
+| `SEMICOLON` | `;`    | Statement terminator / array size separator (`[T; N]`) |
+| `LCURLY`    | `{`    | Block / compound expression open                       |
+| `RCURLY`    | `}`    | Block / compound expression close                      |
+| `ARROW`     | `->`   | Function return type separator                         |
+| `COLON`     | `:`    | Type annotation separator                              |
+
+---
+
+## Expressions
+
+Expressions produce a value. The grammar defines them through a hierarchy of
+precedence levels — lower in the list means lower precedence (binds less
+tightly).
+
+### Operator Precedence Table
+
+| Level | Operators                   | Associativity  | Description                      |
+| ----- | --------------------------- | -------------- | -------------------------------- |
+| 1     | `or`                        | left           | Logical OR (lowest)              |
+| 2     | `and`                       | left           | Logical AND                      |
+| 3     | `\|`                        | left           | Bitwise OR                       |
+| 4     | `^`                         | left           | Bitwise XOR                      |
+| 5     | `&`                         | left           | Bitwise AND                      |
+| 6     | `+`  `-`                    | left           | Addition, subtraction            |
+| 7     | `*`  `/`  `%`               | left           | Multiplication, division, modulo |
+| 8     | `!`  `~`  `-`  `*`  `&`     | right (unary)  | Prefix unary operators           |
+| 9     | `.`  `[…]`  `(…)`           | left (postfix) | Member access, index, call       |
+| 10    | literals, identifiers, `()` | —              | Primary expressions (highest)    |
+
+### Operator Descriptions
+
+#### Binary Operators
+
+| Operator | Name           | Example   | Notes                                        |
+| -------- | -------------- | --------- | -------------------------------------------- |
+| `or`     | Logical OR     | `a or b`  | Short-circuits; both operands must be `bool` |
+| `and`    | Logical AND    | `a and b` | Short-circuits; both operands must be `bool` |
+| `\|`     | Bitwise OR     | `a \| b`  | Integer types                                |
+| `^`      | Bitwise XOR    | `a ^ b`   | Integer types                                |
+| `&`      | Bitwise AND    | `a & b`   | Integer types (binary context)               |
+| `+`      | Addition       | `a + b`   |                                              |
+| `-`      | Subtraction    | `a - b`   |                                              |
+| `*`      | Multiplication | `a * b`   | Binary context (both operands are values)    |
+| `/`      | Division       | `a / b`   | Integer division truncates toward zero       |
+| `%`      | Modulo         | `a % b`   | Sign follows the dividend                    |
+
+#### Unary Prefix Operators
+
+| Operator | Name        | Example | Notes                                            |
+| -------- | ----------- | ------- | ------------------------------------------------ |
+| `!`      | Logical NOT | `!cond` | Operand must be `bool`                           |
+| `~`      | Bitwise NOT | `~mask` | Bitwise complement; integer types                |
+| `-`      | Negation    | `-x`    | Arithmetic negation                              |
+| `*`      | Dereference | `*ptr`  | Unary context; operand must be a pointer type    |
+| `&`      | Address-of  | `&x`    | Unary context; produces a pointer to the operand |
+
+#### Postfix Operators
+
+| Operator | Name          | Example     | Notes                                             |
+| -------- | ------------- | ----------- | ------------------------------------------------- |
+| `.`      | Member access | `obj.field` | Accesses a named field or method of a struct/type |
+| `[…]`    | Subscript     | `arr[i]`    | Indexes into an array, slice, or map              |
+| `(…)`    | Call          | `f(a, b)`   | Invokes a function or closure                     |
+
+> **Disambiguation:** `*` and `&` are context-sensitive.
+> When appearing as the first token of a `unary_expr` they are **unary**
+> (dereference / address-of). When appearing between two `unary_expr`
+> sub-trees inside `multiplicative_expr` or `bitand_expr` they are **binary**
+> (multiplication / bitwise AND). The parser resolves this purely from
+> grammatical position — no look-ahead beyond 1 token is required.
+
+### Parenthesised Expressions
+
+Any expression may be wrapped in parentheses to override default precedence:
+
+```
+(a + b) * c
+```
+
+### Function Call Argument List
+
+Arguments are comma-separated expressions. A trailing comma is **not**
+permitted at this grammar level.
+
+```
+f()
+f(x)
+f(x, y, z)
+```
+
+### Examples
+
+```flux
+// Arithmetic
+a + b * c - d % 2
+
+// Bitwise
+flags & MASK | extra ^ toggle
+
+// Logical
+ready and not_done or fallback
+
+// Mixed unary / postfix
+*ptr.field
+&arr[i]
+!cond
+
+// Chained postfix
+obj.method(arg1, arg2)[0].name
+
+// Explicit precedence override
+(a or b) and c
+```
+
+---
+
+## Types
+
+Types describe the shape and interpretation of values. All type positions in
+the grammar reference the `type` non-terminal.
+
+### Primitive Types
+
+Primitive types are single-keyword types built into the language.
+
+| Type   | Kind             | Width  | Range / Notes                              |
+| ------ | ---------------- | ------ | ------------------------------------------ |
+| `u8`   | Unsigned integer | 8-bit  | 0 … 255                                    |
+| `u16`  | Unsigned integer | 16-bit | 0 … 65 535                                 |
+| `u32`  | Unsigned integer | 32-bit | 0 … 4 294 967 295                          |
+| `u64`  | Unsigned integer | 64-bit | 0 … 2⁶⁴ − 1                                |
+| `i8`   | Signed integer   | 8-bit  | −128 … 127                                 |
+| `i16`  | Signed integer   | 16-bit | −32 768 … 32 767                           |
+| `i32`  | Signed integer   | 32-bit | −2 147 483 648 … 2 147 483 647             |
+| `i64`  | Signed integer   | 64-bit | −2⁶³ … 2⁶³ − 1                             |
+| `f32`  | Floating-point   | 32-bit | IEEE 754 single precision                  |
+| `f64`  | Floating-point   | 64-bit | IEEE 754 double precision                  |
+| `bool` | Boolean          | 1 byte | `true` or `false`                          |
+| `char` | Unicode scalar   | 32-bit | Any Unicode scalar value (not a surrogate) |
+
+### Named Types
+
+A named type is any user-defined type referenced by its identifier — typically a struct name. Because all primitive-type keywords (`u8`, `bool`, etc.) are reserved, an `IDENT` in type position is always a named type, never a primitive.
+
+```flux
+Point        // struct Point { x: f32, y: f32 }
+Node         // struct Node { value: i64, next: *Node }
+*Point       // pointer to a named type
+[Node; 8]    // array of a named type
+```
+
+### Pointer Types
+
+A pointer type is written with a leading `*`.
+
+| Syntax    | Description                                                                           |
+| --------- | ------------------------------------------------------------------------------------- |
+| `*T`      | Typed pointer — points to a value of type `T`                                         |
+| `*opaque` | Opaque pointer — no compile-time pointee type information; equivalent to C's `void *` |
+
+Pointer types may be nested: `**u8` is a pointer to a pointer to `u8`.
+
+```flux
+*u8          // pointer to u8
+**i32        // pointer to pointer to i32
+*opaque      // untyped pointer
+**opaque     // pointer to untyped pointer
+```
+
+### Array Types
+
+Arrays have a fixed size known at compile time.
+
+```
+[ <element-type> ; <size> ]
+```
+
+`<size>` must be a non-negative integer literal (`INT_LIT`). The element type
+may itself be any `type`, including pointers or nested arrays.
+
+```flux
+[u8; 256]          // array of 256 u8 values
+[*u8; 4]           // array of 4 pointers to u8
+[[f32; 3]; 3]      // 3×3 matrix of f32 (array of arrays)
+[*opaque; 8]       // array of 8 opaque pointers
+```
+
+### Type Grammar Summary
+
+```ebnf
+type           = primitive_type | named_type | pointer_type | array_type ;
+primitive_type = "u8" | "u16" | "u32" | "u64"
+               | "i8" | "i16" | "i32" | "i64"
+               | "f32" | "f64" | "bool" | "char" ;
+named_type     = IDENT ;
+pointer_type   = "*" , ( "opaque" | type ) ;
+array_type     = "[" , type , ";" , INT_LIT , "]" ;
+```
+
+---
+
+## Struct Literals
+
+A struct literal constructs a value of a named struct type by providing values for each field.
+
+```
+<TypeName> { <field>: <expr>, ... }
+```
+
+Fields may appear in any order and need not match the declaration order. No trailing comma is permitted.
+
+### Examples
+
+```flux
+let p = Point { x: 1.0, y: 2.0 };
+
+let n = Node {
+    value: 42,
+    next: get_next()
+};
+
+// Nested struct literal
+let outer = Rect {
+    origin: Point { x: 0.0, y: 0.0 },
+    size: Point { x: 10.0, y: 5.0 }
+};
+
+// Empty struct
+let u = Unit {};
+```
+
+### Struct Literals in Conditions
+
+Struct literals are **not permitted** as the outermost expression in `if` and `while` conditions. This restriction exists because `{` after the condition is ambiguous — it could start a struct literal body or the statement block.
+
+```flux
+// ERROR — ambiguous: is `{` a struct body or the if block?
+if Flags { verbose: true } { ... }
+
+// OK — parentheses resolve the ambiguity
+if (Flags { verbose: true }).verbose { ... }
+```
+
+The grammar enforces this through the `expr_ns` (no-struct) hierarchy used in condition positions. Struct literals remain valid everywhere else: `let`, `return`, function arguments, field values, etc.
+
+### Struct Literal Grammar Summary
+
+```ebnf
+primary_expr      = IDENT , [ struct_lit_body ] | INT_LIT | FLOAT_LIT
+                  | STRING_LIT | CHAR_LIT | "true" | "false"
+                  | "(" , expr , ")" ;
+struct_lit_body   = "{" , struct_field_list , "}" ;
+struct_field_list = [ struct_field , { "," , struct_field } ] ;
+struct_field      = IDENT , ":" , expr ;
+```
+
+### No-Struct Expression (`expr_ns`)
+
+`expr_ns` is a parallel expression hierarchy identical to `expr` except its primary level (`primary_expr_ns`) does not allow the `struct_lit_body` suffix after an `IDENT`. Struct literals are still permitted when enclosed in parentheses (`"(" , expr , ")"`), because the `(` unambiguously marks the start of a grouped expression.
+
+`if_stmt` and `while_stmt` use `expr_ns` for their condition; all other expression positions use the full `expr`.
+
+---
+
+## Statements
+
+Statements perform an action and do not produce a value. Each statement is
+terminated by a semicolon `;`.
+
+### Let Statement
+
+Introduces a new named binding in the current scope.
+
+```
+let [mut] <name> [: <type>] [= <expr>] ;
+```
+
+| Part       | Required | Description                                   |
+| ---------- | -------- | --------------------------------------------- |
+| `mut`      | no       | Makes the binding mutable; omit for immutable |
+| `<name>`   | yes      | The identifier being bound                    |
+| `: <type>` | no       | Explicit type annotation                      |
+| `= <expr>` | no       | Initialiser expression                        |
+| `;`        | yes      | Statement terminator                          |
+
+Bindings are **immutable by default**. Attempting to assign to a binding
+declared without `mut` is a compile-time error.
+
+At least one of the type annotation or the initialiser must be present so the
+compiler can determine the binding's type. This is a semantic constraint, not a
+syntactic one — the grammar permits bare `let x;` and the type checker rejects
+it if no type can be inferred from context.
+
+#### Examples
+
+```flux
+// Immutable, type inferred from initialiser
+let x = 42;
+
+// Immutable, explicit type
+let y: f64 = 3.14;
+
+// Mutable, type inferred
+let mut count = 0;
+
+// Mutable, explicit type, no initialiser (must be assigned before use)
+let mut buf: [u8; 128];
+
+// Mutable pointer to u32
+let mut ptr: *u32 = &value;
+
+// Shadowing a previous binding is allowed
+let x = "hello";   // x is now a string, previous x is gone
+```
+
+### Return Statement
+
+Exits the enclosing function immediately, optionally producing a return value.
+
+```
+return [<expr>] ;
+```
+
+`return;` (no expression) is used when the function's return type is the unit
+type `()`. `return <expr>;` returns the value of the expression.
+
+Explicit `return` is only needed for early exits. The idiomatic way to return a
+value from a function is the implicit return of its body block.
+
+```flux
+return;               // unit return
+return 42;            // return an integer
+return x * 2 + 1;    // return an expression
+```
+
+### Expression Statement
+
+Evaluates an expression for its side effects; the resulting value is
+discarded. A semicolon is required.
+
+```
+<expr> ;
+```
+
+```flux
+do_something(x);    // call for side effects
+count + 1;          // legal but silly — value discarded
+```
+
+### Statement Grammar Summary
+
+```ebnf
+stmt          = let_stmt | return_stmt | if_stmt
+              | while_stmt | loop_stmt | break_stmt | continue_stmt
+              | block_stmt | expr_stmt ;
+let_stmt      = "let" , [ "mut" ] , IDENT , [ ":" , type ] , [ "=" , expr ] , ";" ;
+return_stmt   = "return" , [ expr ] , ";" ;
+if_stmt       = "if" , expr_ns , block_stmt , [ "else" , else_branch ] ;
+else_branch   = if_stmt | block_stmt ;
+while_stmt    = "while" , expr_ns , block_stmt ;
+loop_stmt     = "loop" , block_stmt ;
+break_stmt    = "break" , ";" ;
+continue_stmt = "continue" , ";" ;
+block_stmt    = "{" , { stmt } , "}" ;
+expr_stmt     = expr , ";" ;
+```
+
+---
+
+## If Statement
+
+Conditionally executes a block based on a boolean expression.
+
+```
+if <cond> <block> [else <else-branch>]
+```
+
+The condition `<cond>` must be an expression of type `bool`. The body is
+always a `block_stmt` — braces are mandatory.
+
+### Else Branch
+
+The optional `else` branch is either a plain block or another `if` statement,
+enabling `else if` chains of arbitrary length.
+
+```flux
+if x > 0 {
+    pos();
+}
+
+if x > 0 {
+    pos();
+} else {
+    non_pos();
+}
+
+if x > 0 {
+    pos();
+} else if x < 0 {
+    neg();
+} else {
+    zero();
+}
+```
+
+### If Statement Grammar Summary
+
+```ebnf
+if_stmt     = "if" , expr_ns , block_stmt , [ "else" , else_branch ] ;
+else_branch = if_stmt | block_stmt ;
+```
+
+---
+
+## While Loop
+
+Repeatedly executes a block as long as a boolean condition holds. The
+condition is tested before each iteration; if it is false on entry, the body
+never runs.
+
+```
+while <cond> <block>
+```
+
+```flux
+let mut i = 0;
+while i < 10 {
+    process(i);
+    i = i + 1;
+}
+```
+
+### While Loop Grammar Summary
+
+```ebnf
+while_stmt = "while" , expr_ns , block_stmt ;
+```
+
+---
+
+## Loop
+
+Executes a block unconditionally and indefinitely. The loop runs until a
+`break` or `return` inside the body transfers control out.
+
+```
+loop <block>
+```
+
+```flux
+loop {
+    let msg = recv();
+    if msg.is_quit() {
+        break;
+    }
+    handle(msg);
+}
+```
+
+### Loop Grammar Summary
+
+```ebnf
+loop_stmt = "loop" , block_stmt ;
+```
+
+---
+
+## Break and Continue
+
+`break` and `continue` are only valid inside the body of a `while` or `loop`.
+The compiler enforces this as a semantic rule.
+
+| Statement    | Effect                                                         |
+| ------------ | -------------------------------------------------------------- |
+| `break ;`    | Exits the immediately enclosing loop immediately               |
+| `continue ;` | Skips the rest of the current iteration; jumps to the next one |
+
+For `while`, `continue` jumps back to the condition check. For `loop`,
+`continue` jumps back to the top of the body.
+
+```flux
+let mut i = 0;
+while i < 20 {
+    i = i + 1;
+    if i % 2 == 0 {
+        continue;   // skip even numbers
+    }
+    if i > 15 {
+        break;      // stop after 15
+    }
+    process(i);
+}
+```
+
+### Break / Continue Grammar Summary
+
+```ebnf
+break_stmt    = "break" , ";" ;
+continue_stmt = "continue" , ";" ;
+```
+
+---
+
+## Block Statement
+
+A block groups zero or more statements into a single statement and introduces
+a new lexical scope. Blocks do not produce a value.
+
+```
+{ <stmt>* }
+```
+
+### Scoping
+
+Bindings declared inside a block are not visible outside it. A binding in an
+inner scope may shadow a name from an outer scope without affecting it.
+
+```flux
+let x = 1;
+{
+    let x = 2;   // shadows outer x inside this block only
+    f(x);        // uses 2
+}
+// x is still 1 here
+```
+
+### Nesting
+
+Blocks may be nested freely to any depth.
+
+```flux
+{
+    let a = compute_a();
+    {
+        let b = compute_b();
+        use(a, b);
+    }
+    // b is no longer in scope here
+}
+```
+
+### Block Grammar Summary
+
+```ebnf
+block = "{" , { stmt } , "}" ;
+```
+
+---
+
+## Top-Level Definitions
+
+A Flux source file is a sequence of top-level definitions.
+
+```ebnf
+program       = { top_level_def } ;
+top_level_def = func_def | struct_def ;
+```
+
+The leading token unambiguously selects the definition kind: `fn` → function,
+`struct` → struct.
+
+---
+
+## Function Definition
+
+Defines a named, callable function.
+
+```
+fn <name> ( [<params>] ) [-> <return-type>] <block>
+```
+
+| Part               | Required | Description                                              |
+| ------------------ | -------- | -------------------------------------------------------- |
+| `<name>`           | yes      | The function's identifier                                |
+| `( [<params>] )`   | yes      | Comma-separated parameter list, may be empty             |
+| `-> <return-type>` | no       | Return type; omitting it means the function returns `()` |
+| `<block>`          | yes      | Function body — a `block_stmt`                           |
+
+### Parameters
+
+Each parameter is a name with a mandatory type annotation. Parameters are
+immutable by default; `mut` makes the local binding mutable within the body.
+
+```
+[mut] <name> : <type>
+```
+
+```flux
+fn add(a: i32, b: i32) -> i32 {
+    return a + b;
+}
+
+fn greet(name: *u8) {
+    print(name);
+}
+
+fn increment(mut x: i32) -> i32 {
+    x = x + 1;
+    return x;
+}
+
+fn apply(f: *opaque, mut buf: [u8; 64]) -> bool {
+    return call(f, &buf);
+}
+```
+
+### Return Type
+
+If `->` is omitted the return type is implicitly `()` (the unit type). An
+explicit `-> ()` is also permitted but redundant.
+
+```flux
+fn do_work() {          // returns ()
+    side_effect();
+}
+
+fn get_value() -> i64 { // returns i64
+    return 42;
+}
+```
+
+### Function Definition Grammar Summary
+
+```ebnf
+func_def   = "fn" , IDENT , "(" , param_list , ")" , [ "->" , type ] , block_stmt ;
+param_list = [ param , { "," , param } ] ;
+param      = [ "mut" ] , IDENT , ":" , type ;
+```
+
+---
+
+## Struct Definition
+
+Defines a named product type with zero or more typed fields.
+
+```
+struct <name> {
+    <field>: <type>,
+    ...
+}
+```
+
+Fields are separated by commas. No trailing comma is permitted. An empty
+struct (zero fields) is valid.
+
+### Fields
+
+Each field is a name and a type. Fields may be of any type including pointers,
+arrays, and other structs. Field names must be unique within the struct.
+
+```flux
+struct Point {
+    x: f32,
+    y: f32
+}
+
+struct Node {
+    value: i64,
+    next: *Node
+}
+
+struct Buffer {
+    data: *u8,
+    len: u64,
+    cap: u64
+}
+
+struct Unit {}
+```
+
+### Member Access
+
+Fields of a struct value are accessed with the `.` operator (defined in the
+expression grammar). If the value is behind a pointer, dereference it first
+with `*`.
+
+```flux
+let p: Point = make_point();
+let x = p.x;
+
+let ptr: *Point = get_point_ptr();
+let y = (*ptr).y;
+```
+
+### Struct Definition Grammar Summary
+
+```ebnf
+struct_def = "struct" , IDENT , "{" , field_list , "}" ;
+field_list = [ field , { "," , field } ] ;
+field      = IDENT , ":" , type ;
+```
diff --git a/examples/fibonacci.flx b/examples/fibonacci.flx
new file mode 100644
index 0000000..308a75b
--- /dev/null
+++ b/examples/fibonacci.flx
@@ -0,0 +1,7 @@
+fn fibonacci(n: u8) -> u64 {
+    if n < 2 {
+        return n;
+    }
+
+    return fibonacci(n - 1) + fibonacci(n - 2);
+}
\ No newline at end of file
diff --git a/ll1_check.py b/ll1_check.py
new file mode 100644
index 0000000..507e409
--- /dev/null
+++ b/ll1_check.py
@@ -0,0 +1,362 @@
+#!/usr/bin/env python3
+"""
+ll1_check.py — Parse GRAMMAR.ebnf and verify the LL(1) property.
+
+Usage:  python ll1_check.py [grammar_file]  [-v]
+
+Algorithm
+---------
+1. Strip (* … *) comments; tokenise.
+2. Parse ISO/IEC 14977 EBNF into an AST.
+3. Normalise to plain BNF by introducing fresh helper non-terminals:
+     { body }  →  _repN   where  _repN = body , _repN | ε
+     [ body ]  →  _optN   where  _optN = body | ε
+     ( body )  →  inlined (cross-product inside the parent sequence)
+4. Compute FIRST and FOLLOW sets (fixed-point iteration).
+5. For each non-terminal compute PREDICT sets; flag pairwise conflicts.
+"""
+
+import re
+import sys
+from collections import defaultdict
+from itertools import count as _count
+from pathlib import Path
+
+EPSILON = 'ε'
+START   = 'program'          # grammar start symbol
+
+# ═══════════════════════════════════════════════════════════════ 1. Tokenise
+
+_TOK = re.compile(
+    r'"(?:[^"\\]|\\.)*"'    # "quoted terminal string"
+    r'|[A-Z][A-Z0-9_]*'    # UPPERCASE token class  (terminal)
+    r'|[a-z][a-z0-9_]*'    # lowercase identifier   (non-terminal)
+    r'|[=;,|()\[\]{}]'     # single-char punctuation
+)
+
+def tokenise(src: str) -> list:
+    src = re.sub(r'\(\*.*?\*\)', ' ', src, flags=re.DOTALL)
+    return _TOK.findall(src)
+
+
+# ═══════════════════════════════════════════════════════════════ 2. Parse EBNF → AST
+#
+# Each AST node is a tuple:
+#   ('lit', s)       terminal  — quoted string "…" or UPPERCASE token class
+#   ('nt',  s)       non-terminal reference
+#   ('seq', [...])   concatenation  (A , B , C)
+#   ('alt', [...])   alternation    (A | B | C)
+#   ('opt', node)    optional       [ … ]
+#   ('rep', node)    repetition     { … }
+
+class _Parser:
+    def __init__(self, tokens):
+        self._t = tokens
+        self._i = 0
+
+    def _peek(self):
+        return self._t[self._i] if self._i < len(self._t) else None
+
+    def _eat(self, expected=None):
+        v = self._t[self._i]; self._i += 1
+        if expected and v != expected:
+            raise SyntaxError(f'expected {expected!r}, got {v!r} '
+                              f'(token #{self._i - 1})')
+        return v
+
+    def parse_grammar(self) -> dict:
+        rules = {}
+        while self._i < len(self._t):
+            name = self._eat()
+            self._eat('=')
+            rules[name] = self._body()
+            self._eat(';')
+        return rules
+
+    def _body(self):
+        alts = [self._seq()]
+        while self._peek() == '|':
+            self._eat()
+            alts.append(self._seq())
+        return alts[0] if len(alts) == 1 else ('alt', alts)
+
+    def _seq(self):
+        items = [self._atom()]
+        while self._peek() == ',':
+            self._eat()
+            items.append(self._atom())
+        return items[0] if len(items) == 1 else ('seq', items)
+
+    def _atom(self):
+        t = self._peek()
+        if t == '[':
+            self._eat(); b = self._body(); self._eat(']')
+            return ('opt', b)
+        if t == '{':
+            self._eat(); b = self._body(); self._eat('}')
+            return ('rep', b)
+        if t == '(':
+            self._eat(); b = self._body(); self._eat(')')
+            return b                         # group — return inner node directly
+        if t and (t[0] == '"' or t[0].isupper()):
+            return ('lit', self._eat())
+        if t and t[0].islower():
+            return ('nt',  self._eat())
+        raise SyntaxError(f'unexpected token {t!r}')
+
+
+# ═══════════════════════════════════════════════════════════════ 3. Normalise
+
+def normalise(ebnf: dict) -> tuple:
+    """
+    Convert EBNF AST to plain BNF.
+
+    Returns
+    -------
+    bnf     : dict[name → list[list[str]]]
+                Each inner list is one production; [] = ε production.
+    origins : dict[helper_name → parent_rule_name]
+                Maps generated helper names back to the rule that created them.
+    """
+    bnf:     dict = {}
+    origins: dict = {}
+    ctr = _count()
+
+    def fresh(tag: str, rule: str) -> str:
+        h = f'_{tag}{next(ctr)}'
+        origins[h] = rule
+        return h
+
+    def expand(node, rule: str, in_seq: bool = False) -> list:
+        """
+        Return a list of alternative symbol sequences for this AST node.
+
+        in_seq: when True, an 'alt' node is wrapped in a fresh non-terminal
+                instead of being inlined.  This prevents the cross-product
+                expansion of  A , (B | C) , D  from producing two productions
+                that both start with A — a common-prefix false positive that
+                would be misreported as an LL(1) conflict.  The grammar is
+                already left-factored at the EBNF level; this preserves that.
+        """
+        tag = node[0]
+
+        if tag == 'lit':
+            return [[node[1]]]
+
+        if tag == 'nt':
+            return [[node[1]]]
+
+        if tag == 'seq':
+            # Children of a seq are expanded with in_seq=True so that any
+            # alt node inside the sequence becomes a fresh non-terminal.
+            result = [[]]
+            for child in node[1]:
+                child_seqs = expand(child, rule, in_seq=True)
+                result = [a + b for a in result for b in child_seqs]
+            return result
+
+        if tag == 'alt':
+            if in_seq:
+                # Alt inside a seq: wrap in a fresh non-terminal (_grpN).
+                # Each alternative is expanded at top-level (in_seq=False).
+                h = fresh('grp', rule)
+                bnf[h] = [s for child in node[1]
+                          for s in expand(child, rule, in_seq=False)]
+                return [[h]]
+            # Alt at the top level of a rule body: return alternatives directly.
+            return [s for child in node[1]
+                    for s in expand(child, rule, in_seq=False)]
+
+        if tag == 'opt':
+            # [ body ]  →  _optN = body | ε
+            h = fresh('opt', rule)
+            bnf[h] = expand(node[1], rule) + [[]]
+            return [[h]]
+
+        if tag == 'rep':
+            # { body }  →  _repN = body , _repN | ε
+            h = fresh('rep', rule)
+            body_seqs = expand(node[1], rule)
+            bnf[h] = [s + [h] for s in body_seqs] + [[]]
+            return [[h]]
+
+        raise ValueError(f'unknown AST tag {tag!r}')
+
+    for name, node in ebnf.items():
+        bnf[name] = expand(node, name)
+
+    return bnf, origins
+
+
+# ═══════════════════════════════════════════════════════════════ 4. FIRST / FOLLOW
+
+def first_of_seq(seq: list, first: dict, bnf: dict) -> set:
+    """
+    FIRST set of a sequence of grammar symbols.
+    Returns a set of terminal strings; includes EPSILON if the whole
+    sequence can derive the empty string.
+    """
+    result = set()
+    for sym in seq:
+        if sym not in bnf:              # terminal symbol
+            result.add(sym)
+            return result               # terminals never derive ε
+        sym_first = first[sym]
+        result |= sym_first - {EPSILON}
+        if EPSILON not in sym_first:
+            return result               # this symbol is not nullable — stop
+    result.add(EPSILON)                 # every symbol in seq was nullable
+    return result
+
+
+def compute_first(bnf: dict) -> dict:
+    first = defaultdict(set)
+    changed = True
+    while changed:
+        changed = False
+        for name, prods in bnf.items():
+            for prod in prods:
+                new = first_of_seq(prod, first, bnf)
+                if not new <= first[name]:
+                    first[name] |= new
+                    changed = True
+    return first
+
+
+def compute_follow(bnf: dict, first: dict, start: str) -> dict:
+    follow = defaultdict(set)
+    follow[start].add('$')
+    changed = True
+    while changed:
+        changed = False
+        for name, prods in bnf.items():
+            for prod in prods:
+                for i, sym in enumerate(prod):
+                    if sym not in bnf:
+                        continue                    # skip terminals
+                    # FIRST of what comes after sym in this production
+                    rest_first = first_of_seq(prod[i + 1:], first, bnf)
+                    before = len(follow[sym])
+                    follow[sym] |= rest_first - {EPSILON}
+                    if EPSILON in rest_first:
+                        follow[sym] |= follow[name]
+                    if len(follow[sym]) > before:
+                        changed = True
+    return follow
+
+
+# ═══════════════════════════════════════════════════════════════ 5. LL(1) check
+
+def predict_set(prod: list, name: str, first: dict, follow: dict, bnf: dict) -> set:
+    """
+    PREDICT(A → prod) = (FIRST(prod) − {ε})  ∪  (FOLLOW(A) if ε ∈ FIRST(prod))
+    """
+    f = first_of_seq(prod, first, bnf)
+    p = f - {EPSILON}
+    if EPSILON in f:
+        p |= follow[name]
+    return p
+
+
+def check_ll1(bnf: dict, first: dict, follow: dict) -> list:
+    """
+    For each non-terminal check that all PREDICT sets are pairwise disjoint.
+    Returns a list of conflict dicts.
+    """
+    errors = []
+    for name, prods in bnf.items():
+        sets = [predict_set(p, name, first, follow, bnf) for p in prods]
+        for i in range(len(sets)):
+            for j in range(i + 1, len(sets)):
+                conflict = sets[i] & sets[j]
+                if conflict:
+                    errors.append({
+                        'rule':     name,
+                        'prod_i':   prods[i],
+                        'prod_j':   prods[j],
+                        'conflict': sorted(conflict),
+                    })
+    return errors
+
+
+# ═══════════════════════════════════════════════════════════════ 6. Main
+
+def _fmt_prod(prod: list) -> str:
+    return ' '.join(prod) if prod else EPSILON
+
+
+def main():
+    argv      = sys.argv[1:]
+    verbose   = '-v' in argv
+    positional = [a for a in argv if not a.startswith('-')]
+    path      = Path(positional[0]) if positional else Path('GRAMMAR.ebnf')
+
+    # ── Load & parse ──────────────────────────────────────────────────────
+    print(f'Checking {path} …')
+    try:
+        src = path.read_text(encoding='utf-8')
+    except FileNotFoundError:
+        sys.exit(f'error: file not found: {path}')
+
+    toks = tokenise(src)
+    try:
+        ebnf = _Parser(toks).parse_grammar()
+    except SyntaxError as exc:
+        sys.exit(f'EBNF parse error: {exc}')
+
+    bnf, origins = normalise(ebnf)
+    first  = compute_first(bnf)
+    follow = compute_follow(bnf, first, START)
+    errors = check_ll1(bnf, first, follow)
+
+    # ── Summary line ──────────────────────────────────────────────────────
+    named   = sorted(n for n in bnf if not n.startswith('_'))
+    helpers = sorted(n for n in bnf if     n.startswith('_'))
+    print(f'  {len(named)} named rules, {len(helpers)} generated helper rules\n')
+
+    # ── Optional verbose output ───────────────────────────────────────────
+    if verbose:
+        col = max((len(n) for n in named), default=0) + 2
+        print('── FIRST sets (named rules) ──────────────────────────────')
+        for n in named:
+            syms     = sorted(first[n] - {EPSILON})
+            nullable = '  [nullable]' if EPSILON in first[n] else ''
+            print(f'  FIRST({n}){"":<{col - len(n)}}= {{ {", ".join(syms)} }}{nullable}')
+        print()
+        print('── FOLLOW sets (named rules) ─────────────────────────────')
+        for n in named:
+            syms = sorted(follow[n])
+            print(f'  FOLLOW({n}){"":<{col - len(n)}}= {{ {", ".join(syms)} }}')
+        print()
+
+    # ── LL(1) result ──────────────────────────────────────────────────────
+    named_err  = [e for e in errors if not e['rule'].startswith('_')]
+    helper_err = [e for e in errors if     e['rule'].startswith('_')]
+
+    if not errors:
+        print('✓  Grammar is LL(1) — no conflicts detected.')
+        return
+
+    print(f'✗  {len(errors)} conflict(s): '
+          f'{len(named_err)} in named rules, '
+          f'{len(helper_err)} in generated helpers\n')
+
+    for e in named_err:
+        print(f'  Rule [{e["rule"]}]')
+        print(f'    alt A : {_fmt_prod(e["prod_i"])}')
+        print(f'    alt B : {_fmt_prod(e["prod_j"])}')
+        print(f'    ambiguous token(s): {e["conflict"]}\n')
+
+    if helper_err:
+        print('  Conflicts in generated helpers '
+              '(each is linked back to its enclosing named rule):')
+        for e in helper_err:
+            orig = origins.get(e['rule'], '?')
+            print(f'  [{e["rule"]}]  ← from rule [{orig}]')
+            print(f'    alt A : {_fmt_prod(e["prod_i"])}')
+            print(f'    alt B : {_fmt_prod(e["prod_j"])}')
+            print(f'    ambiguous token(s): {e["conflict"]}\n')
+
+
+if __name__ == '__main__':
+    main()