Initial Flux language specification
Add the LL(1) context-free grammar (GRAMMAR.ebnf), token and syntax reference (SYNTAX.md), LL(1) verification tool (ll1_check.py), and a fibonacci example demonstrating the language.
This commit is contained in:
435
GRAMMAR.ebnf
Normal file
435
GRAMMAR.ebnf
Normal file
@@ -0,0 +1,435 @@
|
|||||||
|
(* Flux Language Grammar — Context-Free LL(1) Grammar *)
|
||||||
|
(* ================================================================ *)
|
||||||
|
(* *)
|
||||||
|
(* Notation (ISO/IEC 14977 EBNF): *)
|
||||||
|
(* rule = definition ; defines a rule (terminated by ;) *)
|
||||||
|
(* a , b concatenation *)
|
||||||
|
(* a | b alternation *)
|
||||||
|
(* { a } zero or more repetitions of a *)
|
||||||
|
(* [ a ] optional a (zero or one) *)
|
||||||
|
(* ( a | b ) grouping *)
|
||||||
|
(* "literal" terminal string *)
|
||||||
|
(* *)
|
||||||
|
(* UPPERCASE identifiers are lexical token classes whose value *)
|
||||||
|
(* cannot be expressed as a single literal (e.g. IDENT, INT_LIT). *)
|
||||||
|
(* They are NOT defined here — see SYNTAX.md. *)
|
||||||
|
(* *)
|
||||||
|
(* Unique/fixed tokens are written as quoted literals directly. *)
|
||||||
|
(* *)
|
||||||
|
(* Lowercase identifiers are non-terminals (grammar productions). *)
|
||||||
|
|
||||||
|
|
||||||
|
(* ================================================================ *)
|
||||||
|
(* Program (start symbol) *)
|
||||||
|
(* ================================================================ *)
|
||||||
|
|
||||||
|
program = { top_level_def } ;
|
||||||
|
|
||||||
|
top_level_def = func_def
|
||||||
|
| struct_def ;
|
||||||
|
|
||||||
|
|
||||||
|
(* ================================================================ *)
|
||||||
|
(* Expressions *)
|
||||||
|
(* ================================================================ *)
|
||||||
|
|
||||||
|
expr = or_expr ;
|
||||||
|
|
||||||
|
|
||||||
|
(* --- Logical OR (lowest-precedence binary operator) --- *)
|
||||||
|
(* *)
|
||||||
|
(* Uses keyword `or`; left-associative via iteration. *)
|
||||||
|
|
||||||
|
or_expr = and_expr , { "or" , and_expr } ;
|
||||||
|
|
||||||
|
|
||||||
|
(* --- Logical AND --- *)
|
||||||
|
(* *)
|
||||||
|
(* Uses keyword `and`; left-associative via iteration. *)
|
||||||
|
|
||||||
|
and_expr = bitor_expr , { "and" , bitor_expr } ;
|
||||||
|
|
||||||
|
|
||||||
|
(* --- Bitwise OR --- *)
|
||||||
|
|
||||||
|
bitor_expr = bitxor_expr , { "|" , bitxor_expr } ;
|
||||||
|
|
||||||
|
|
||||||
|
(* --- Bitwise XOR --- *)
|
||||||
|
|
||||||
|
bitxor_expr = bitand_expr , { "^" , bitand_expr } ;
|
||||||
|
|
||||||
|
|
||||||
|
(* --- Bitwise AND --- *)
|
||||||
|
|
||||||
|
bitand_expr = additive_expr , { "&" , additive_expr } ;
|
||||||
|
|
||||||
|
|
||||||
|
(* --- Additive: addition and subtraction --- *)
|
||||||
|
|
||||||
|
additive_expr = multiplicative_expr ,
|
||||||
|
{ ( "+" | "-" ) , multiplicative_expr } ;
|
||||||
|
|
||||||
|
|
||||||
|
(* --- Multiplicative: multiplication, division, modulo --- *)
|
||||||
|
|
||||||
|
multiplicative_expr = unary_expr ,
|
||||||
|
{ ( "*" | "/" | "%" ) , unary_expr } ;
|
||||||
|
|
||||||
|
|
||||||
|
(* --- Unary operators (prefix, right-associative by recursion) --- *)
|
||||||
|
(* *)
|
||||||
|
(* "!" logical not *)
|
||||||
|
(* "~" bitwise not *)
|
||||||
|
(* "-" arithmetic negation *)
|
||||||
|
(* "*" dereference (pointer indirection) *)
|
||||||
|
(* "&" address-of *)
|
||||||
|
|
||||||
|
unary_expr = "!" , unary_expr
|
||||||
|
| "~" , unary_expr
|
||||||
|
| "-" , unary_expr
|
||||||
|
| "*" , unary_expr
|
||||||
|
| "&" , unary_expr
|
||||||
|
| postfix_expr ;
|
||||||
|
|
||||||
|
|
||||||
|
(* --- Postfix operators (left-associative via iteration) --- *)
|
||||||
|
(* *)
|
||||||
|
(* Postfix operators bind tighter than any prefix or binary form. *)
|
||||||
|
(* Multiple postfix operations chain left-to-right. *)
|
||||||
|
|
||||||
|
postfix_expr = primary_expr , { postfix_op } ;
|
||||||
|
|
||||||
|
postfix_op = "." , IDENT (* member access *)
|
||||||
|
| "[" , expr , "]" (* subscript/index *)
|
||||||
|
| "(" , arg_list , ")" ; (* function call *)
|
||||||
|
|
||||||
|
|
||||||
|
(* --- Primary expressions (highest precedence) --- *)
|
||||||
|
(* *)
|
||||||
|
(* LL(1) note: after IDENT, peek at the next token. *)
|
||||||
|
(* "{" → parse struct_lit_body (struct literal) *)
|
||||||
|
(* other → bare identifier reference *)
|
||||||
|
|
||||||
|
primary_expr = IDENT , [ struct_lit_body ] (* ident or struct lit *)
|
||||||
|
| INT_LIT
|
||||||
|
| FLOAT_LIT
|
||||||
|
| STRING_LIT
|
||||||
|
| CHAR_LIT
|
||||||
|
| "true"
|
||||||
|
| "false"
|
||||||
|
| "(" , expr , ")" ; (* parenthesised *)
|
||||||
|
|
||||||
|
|
||||||
|
(* --- Struct literal --- *)
|
||||||
|
(* *)
|
||||||
|
(* A struct literal constructs a value of a named struct type. *)
|
||||||
|
(* IDENT "{" field: expr, ... "}" *)
|
||||||
|
(* *)
|
||||||
|
(* Field order need not match the struct definition order. *)
|
||||||
|
(* No trailing comma is permitted (consistent with struct_def). *)
|
||||||
|
(* *)
|
||||||
|
(* LL(1) notes: *)
|
||||||
|
(* struct_field_list: "}" → ε; IDENT → first field *)
|
||||||
|
(* FIRST(struct_field) = {IDENT} *)
|
||||||
|
(* FOLLOW(struct_field_list) = {"}"} *)
|
||||||
|
(* Disjoint, so no look-ahead conflict. *)
|
||||||
|
|
||||||
|
struct_lit_body = "{" , struct_field_list , "}" ;
|
||||||
|
|
||||||
|
struct_field_list = [ struct_field , { "," , struct_field } ] ;
|
||||||
|
|
||||||
|
struct_field = IDENT , ":" , expr ;
|
||||||
|
|
||||||
|
|
||||||
|
(* ================================================================ *)
|
||||||
|
(* Argument List *)
|
||||||
|
(* ================================================================ *)
|
||||||
|
|
||||||
|
arg_list = [ expr , { "," , expr } ] ;
|
||||||
|
|
||||||
|
|
||||||
|
(* ================================================================ *)
|
||||||
|
(* No-Struct Expression Hierarchy (expr_ns) *)
|
||||||
|
(* ================================================================ *)
|
||||||
|
(* *)
|
||||||
|
(* Struct literals create an LL(1) ambiguity in if/while conditions:*)
|
||||||
|
(* if Point { x: 1 } { ... } *)
|
||||||
|
(* After "Point", "{" could open a struct literal OR the body block.*)
|
||||||
|
(* *)
|
||||||
|
(* Solution: define expr_ns — identical to expr except *)
|
||||||
|
(* primary_expr_ns disallows the struct_lit_body suffix after IDENT.*)
|
||||||
|
(* Struct literals ARE still allowed when parenthesised: *)
|
||||||
|
(* if (Point { x: 1 }).flag { ... } *)
|
||||||
|
(* *)
|
||||||
|
(* if_stmt and while_stmt use expr_ns for their condition. *)
|
||||||
|
(* All other expression positions use the full expr. *)
|
||||||
|
|
||||||
|
expr_ns = or_expr_ns ;
|
||||||
|
|
||||||
|
or_expr_ns = and_expr_ns , { "or" , and_expr_ns } ;
|
||||||
|
and_expr_ns = bitor_expr_ns , { "and" , bitor_expr_ns } ;
|
||||||
|
|
||||||
|
bitor_expr_ns = bitxor_expr_ns , { "|" , bitxor_expr_ns } ;
|
||||||
|
bitxor_expr_ns = bitand_expr_ns , { "^" , bitand_expr_ns } ;
|
||||||
|
bitand_expr_ns = additive_expr_ns , { "&" , additive_expr_ns } ;
|
||||||
|
|
||||||
|
additive_expr_ns = multiplicative_expr_ns ,
|
||||||
|
{ ( "+" | "-" ) , multiplicative_expr_ns } ;
|
||||||
|
|
||||||
|
multiplicative_expr_ns = unary_expr_ns ,
|
||||||
|
{ ( "*" | "/" | "%" ) , unary_expr_ns } ;
|
||||||
|
|
||||||
|
unary_expr_ns = "!" , unary_expr_ns
|
||||||
|
| "~" , unary_expr_ns
|
||||||
|
| "-" , unary_expr_ns
|
||||||
|
| "*" , unary_expr_ns
|
||||||
|
| "&" , unary_expr_ns
|
||||||
|
| postfix_expr_ns ;
|
||||||
|
|
||||||
|
postfix_expr_ns = primary_expr_ns , { postfix_op } ;
|
||||||
|
|
||||||
|
(* primary_expr_ns: same as primary_expr but IDENT is never *)
|
||||||
|
(* followed by struct_lit_body. Note "(" , expr , ")" uses full *)
|
||||||
|
(* expr, so struct literals are permitted inside parentheses. *)
|
||||||
|
|
||||||
|
primary_expr_ns = IDENT (* bare ident only *)
|
||||||
|
| INT_LIT
|
||||||
|
| FLOAT_LIT
|
||||||
|
| STRING_LIT
|
||||||
|
| CHAR_LIT
|
||||||
|
| "true"
|
||||||
|
| "false"
|
||||||
|
| "(" , expr , ")" ; (* struct lit OK here *)
|
||||||
|
|
||||||
|
|
||||||
|
(* ================================================================ *)
|
||||||
|
(* Types *)
|
||||||
|
(* ================================================================ *)
|
||||||
|
|
||||||
|
type = primitive_type
|
||||||
|
| named_type
|
||||||
|
| pointer_type
|
||||||
|
| array_type ;
|
||||||
|
|
||||||
|
|
||||||
|
(* --- Primitive types --- *)
|
||||||
|
(* *)
|
||||||
|
(* Unsigned integers : u8 u16 u32 u64 *)
|
||||||
|
(* Signed integers : i8 i16 i32 i64 *)
|
||||||
|
(* Floating-point : f32 f64 *)
|
||||||
|
(* Other : bool char *)
|
||||||
|
|
||||||
|
primitive_type = "u8" | "u16" | "u32" | "u64"
|
||||||
|
| "i8" | "i16" | "i32" | "i64"
|
||||||
|
| "f32" | "f64"
|
||||||
|
| "bool" | "char" ;
|
||||||
|
|
||||||
|
|
||||||
|
(* --- Named types --- *)
|
||||||
|
(* *)
|
||||||
|
(* A user-defined type referenced by its identifier (e.g. a struct *)
|
||||||
|
(* name). The lexer guarantees that all primitive-type keywords are *)
|
||||||
|
(* reserved, so IDENT never clashes with primitive_type. *)
|
||||||
|
|
||||||
|
named_type = IDENT ;
|
||||||
|
|
||||||
|
|
||||||
|
(* --- Pointer types --- *)
|
||||||
|
(* *)
|
||||||
|
(* "*" type — typed pointer; the pointee type is known. *)
|
||||||
|
(* "*opaque" — untyped/opaque pointer (no pointee type info). *)
|
||||||
|
(* *)
|
||||||
|
(* LL(1) note: after "*", "opaque" is not in FIRST(type), so the *)
|
||||||
|
(* two alternatives are always distinguishable with one token. *)
|
||||||
|
|
||||||
|
pointer_type = "*" , ( "opaque" | type ) ;
|
||||||
|
|
||||||
|
|
||||||
|
(* --- Array types --- *)
|
||||||
|
(* *)
|
||||||
|
(* "[" type ";" INT_LIT "]" *)
|
||||||
|
(* *)
|
||||||
|
(* The element type and the fixed size (a non-negative integer *)
|
||||||
|
(* literal) are separated by ";". Sizes that are constant *)
|
||||||
|
(* expressions may be introduced in a later grammar revision. *)
|
||||||
|
|
||||||
|
array_type = "[" , type , ";" , INT_LIT , "]" ;
|
||||||
|
|
||||||
|
|
||||||
|
(* ================================================================ *)
|
||||||
|
(* Statements *)
|
||||||
|
(* ================================================================ *)
|
||||||
|
|
||||||
|
stmt = let_stmt
|
||||||
|
| return_stmt
|
||||||
|
| if_stmt
|
||||||
|
| while_stmt
|
||||||
|
| loop_stmt
|
||||||
|
| break_stmt
|
||||||
|
| continue_stmt
|
||||||
|
| block_stmt
|
||||||
|
| expr_stmt ;
|
||||||
|
|
||||||
|
|
||||||
|
(* --- Return statement --- *)
|
||||||
|
(* *)
|
||||||
|
(* Exits the enclosing function, optionally yielding a value. *)
|
||||||
|
(* "return ;" is used when the function return type is (). *)
|
||||||
|
(* *)
|
||||||
|
(* LL(1): after "return", peek at next token. *)
|
||||||
|
(* ";" → no expression (unit return) *)
|
||||||
|
(* other → parse expr, then expect ";" *)
|
||||||
|
(* ";" is not in FIRST(expr), so the two cases are unambiguous. *)
|
||||||
|
|
||||||
|
return_stmt = "return" , [ expr ] , ";" ;
|
||||||
|
|
||||||
|
|
||||||
|
(* --- Expression statement --- *)
|
||||||
|
(* *)
|
||||||
|
(* Evaluates an expression for its side effects; the value is *)
|
||||||
|
(* discarded. The ";" is mandatory. *)
|
||||||
|
(* *)
|
||||||
|
(* LL(1): at stmt level: *)
|
||||||
|
(* "let" → let_stmt *)
|
||||||
|
(* "return" → return_stmt *)
|
||||||
|
(* "if" → if_stmt *)
|
||||||
|
(* "while" → while_stmt *)
|
||||||
|
(* "loop" → loop_stmt *)
|
||||||
|
(* "break" → break_stmt *)
|
||||||
|
(* "continue" → continue_stmt *)
|
||||||
|
(* "{" → block_stmt *)
|
||||||
|
(* other → expr_stmt *)
|
||||||
|
|
||||||
|
expr_stmt = expr , ";" ;
|
||||||
|
|
||||||
|
|
||||||
|
(* --- If statement --- *)
|
||||||
|
(* *)
|
||||||
|
(* Conditionally executes a block. An optional "else" branch may *)
|
||||||
|
(* follow; it is either a plain block or another "if" statement, *)
|
||||||
|
(* enabling "else if" chains of arbitrary length. *)
|
||||||
|
(* *)
|
||||||
|
(* LL(1) notes: *)
|
||||||
|
(* condition uses expr_ns — struct literals are forbidden at the *)
|
||||||
|
(* outermost level to avoid ambiguity with the body block's "{". *)
|
||||||
|
(* [ "else" ... ] — consume "else" iff next token is "else" *)
|
||||||
|
(* else_branch: "if" → if_stmt (else-if); "{" → block_stmt *)
|
||||||
|
(* The two else_branch alternatives start with distinct tokens, *)
|
||||||
|
(* so no look-ahead conflict arises (no dangling-else ambiguity). *)
|
||||||
|
|
||||||
|
if_stmt = "if" , expr_ns , block_stmt , [ "else" , else_branch ] ;
|
||||||
|
|
||||||
|
else_branch = if_stmt (* else if *)
|
||||||
|
| block_stmt ; (* plain else *)
|
||||||
|
|
||||||
|
|
||||||
|
(* --- While loop --- *)
|
||||||
|
(* *)
|
||||||
|
(* Repeatedly executes the body as long as the condition is true. *)
|
||||||
|
(* The condition is re-evaluated before every iteration. *)
|
||||||
|
(* If the condition is false on the first check, the body never *)
|
||||||
|
(* executes. *)
|
||||||
|
(* *)
|
||||||
|
(* Like if_stmt, the condition uses expr_ns to prevent struct *)
|
||||||
|
(* literal ambiguity with the body block's opening "{". *)
|
||||||
|
|
||||||
|
while_stmt = "while" , expr_ns , block_stmt ;
|
||||||
|
|
||||||
|
|
||||||
|
(* --- Infinite loop --- *)
|
||||||
|
(* *)
|
||||||
|
(* Executes the body unconditionally and indefinitely. The only *)
|
||||||
|
(* ways to exit are "break" or "return" inside the body. *)
|
||||||
|
|
||||||
|
loop_stmt = "loop" , block_stmt ;
|
||||||
|
|
||||||
|
|
||||||
|
(* --- Break and continue --- *)
|
||||||
|
(* *)
|
||||||
|
(* "break" exits the immediately enclosing "while" or "loop". *)
|
||||||
|
(* "continue" skips the rest of the current iteration and jumps to *)
|
||||||
|
(* the next condition check (while) or iteration (loop). *)
|
||||||
|
(* Both are only valid inside a loop body; the compiler enforces *)
|
||||||
|
(* this as a semantic rule. *)
|
||||||
|
|
||||||
|
break_stmt = "break" , ";" ;
|
||||||
|
continue_stmt = "continue" , ";" ;
|
||||||
|
|
||||||
|
|
||||||
|
(* --- Block statement --- *)
|
||||||
|
(* *)
|
||||||
|
(* A block groups zero or more statements into a single statement *)
|
||||||
|
(* and introduces a new lexical scope. It does not produce a value. *)
|
||||||
|
(* *)
|
||||||
|
(* LL(1): at stmt level, "{" unambiguously selects block since no *)
|
||||||
|
(* other stmt alternative starts with "{". *)
|
||||||
|
|
||||||
|
block_stmt = "{" , { stmt } , "}" ;
|
||||||
|
|
||||||
|
|
||||||
|
(* --- Let statement --- *)
|
||||||
|
(* *)
|
||||||
|
(* Introduces a named binding in the current scope. *)
|
||||||
|
(* Bindings are immutable by default; "mut" opts into mutability. *)
|
||||||
|
(* *)
|
||||||
|
(* The type annotation and the initialiser are both optional, but *)
|
||||||
|
(* at least one must be present for the binding to be usable; *)
|
||||||
|
(* the compiler enforces this as a semantic (not syntactic) rule. *)
|
||||||
|
(* *)
|
||||||
|
(* LL(1) notes: *)
|
||||||
|
(* [ "mut" ] — consume "mut" iff the next token is "mut" *)
|
||||||
|
(* [ ":" ... ] — consume iff next token is ":" *)
|
||||||
|
(* [ "=" ... ] — consume iff next token is "=" *)
|
||||||
|
(* All decision tokens are distinct, so no look-ahead conflict. *)
|
||||||
|
|
||||||
|
let_stmt = "let" , [ "mut" ] , IDENT ,
|
||||||
|
[ ":" , type ] ,
|
||||||
|
[ "=" , expr ] ,
|
||||||
|
";" ;
|
||||||
|
|
||||||
|
|
||||||
|
(* ================================================================ *)
|
||||||
|
(* Top-Level Definitions *)
|
||||||
|
(* ================================================================ *)
|
||||||
|
|
||||||
|
(* --- Function definition --- *)
|
||||||
|
(* *)
|
||||||
|
(* Defines a named function with a typed parameter list and an *)
|
||||||
|
(* optional return type. Omitting "->" implies a return type of (). *)
|
||||||
|
(* *)
|
||||||
|
(* LL(1) notes: *)
|
||||||
|
(* param_list: ")" → ε (empty list); else parse first param *)
|
||||||
|
(* param: "mut" → consume; IDENT → skip (mut absent) *)
|
||||||
|
(* [ "->" ... ]: consume iff next token is "->" *)
|
||||||
|
(* "->" is a two-character token; distinct from all stmt-starting *)
|
||||||
|
(* tokens, so no look-ahead conflict with block_stmt that follows *)
|
||||||
|
|
||||||
|
func_def = "fn" , IDENT , "(" , param_list , ")" ,
|
||||||
|
[ "->" , type ] ,
|
||||||
|
block_stmt ;
|
||||||
|
|
||||||
|
param_list = [ param , { "," , param } ] ;
|
||||||
|
|
||||||
|
(* Each parameter is an optionally-mutable name with a required *)
|
||||||
|
(* type annotation. Mutability applies within the function body. *)
|
||||||
|
|
||||||
|
param = [ "mut" ] , IDENT , ":" , type ;
|
||||||
|
|
||||||
|
|
||||||
|
(* --- Struct definition --- *)
|
||||||
|
(* *)
|
||||||
|
(* Defines a named product type with zero or more typed fields. *)
|
||||||
|
(* Fields are separated by commas; no trailing comma is permitted. *)
|
||||||
|
(* *)
|
||||||
|
(* LL(1) notes: *)
|
||||||
|
(* field_list: "}" → ε (empty struct); else parse first field *)
|
||||||
|
(* FIRST(field) = {IDENT}, FOLLOW(field_list) = {"}"} *)
|
||||||
|
(* Disjoint, so no look-ahead conflict. *)
|
||||||
|
(* top_level_def: "fn" → func_def; "struct" → struct_def *)
|
||||||
|
|
||||||
|
struct_def = "struct" , IDENT , "{" , field_list , "}" ;
|
||||||
|
|
||||||
|
field_list = [ field , { "," , field } ] ;
|
||||||
|
|
||||||
|
field = IDENT , ":" , type ;
|
||||||
803
SYNTAX.md
Normal file
803
SYNTAX.md
Normal file
@@ -0,0 +1,803 @@
|
|||||||
|
# Flux Language Syntax Reference
|
||||||
|
|
||||||
|
## Lexical Tokens
|
||||||
|
|
||||||
|
All tokens listed here are produced by the lexer (lexical analysis phase) and
|
||||||
|
appear as UPPERCASE terminals in `GRAMMAR.ebnf`.
|
||||||
|
|
||||||
|
### Literals
|
||||||
|
|
||||||
|
| Token | Description | Examples |
|
||||||
|
| ------------ | ------------------------------------------------------------------- | ------------------------------ |
|
||||||
|
| `INT_LIT` | Integer literal (decimal, hex `0x`, octal `0o`, binary `0b`) | `42`, `0xFF`, `0o77`, `0b1010` |
|
||||||
|
| `FLOAT_LIT` | Floating-point literal | `3.14`, `1.0e-9`, `0.5` |
|
||||||
|
| `STRING_LIT` | Double-quoted UTF-8 string, supports `\n \t \\ \"` escape sequences | `"hello\nworld"` |
|
||||||
|
| `CHAR_LIT` | Single-quoted Unicode scalar value | `'a'`, `'\n'`, `'\u{1F600}'` |
|
||||||
|
| `TRUE` | Boolean true literal | `true` |
|
||||||
|
| `FALSE` | Boolean false literal | `false` |
|
||||||
|
|
||||||
|
### Identifier
|
||||||
|
|
||||||
|
| Token | Description |
|
||||||
|
| ------- | ------------------------------------------------------------------------------------------------------------ |
|
||||||
|
| `IDENT` | Identifier: starts with a letter or `_`, followed by letters, digits, or `_`. Unicode letters are permitted. |
|
||||||
|
|
||||||
|
### Operator Tokens
|
||||||
|
|
||||||
|
| Token | Lexeme | Description |
|
||||||
|
| --------- | ------ | -------------------------------------- |
|
||||||
|
| `PLUS` | `+` | Addition / unary plus (not in grammar) |
|
||||||
|
| `MINUS` | `-` | Subtraction / unary negation |
|
||||||
|
| `STAR` | `*` | Multiplication / pointer dereference |
|
||||||
|
| `SLASH` | `/` | Division |
|
||||||
|
| `PERCENT` | `%` | Modulo (remainder) |
|
||||||
|
| `AMP` | `&` | Bitwise AND / address-of |
|
||||||
|
| `PIPE` | `\|` | Bitwise OR |
|
||||||
|
| `CARET` | `^` | Bitwise XOR |
|
||||||
|
| `BANG` | `!` | Logical NOT |
|
||||||
|
| `TILDE` | `~` | Bitwise NOT |
|
||||||
|
| `DOT` | `.` | Member access |
|
||||||
|
|
||||||
|
### Keyword Tokens
|
||||||
|
|
||||||
|
#### Operator Keywords
|
||||||
|
|
||||||
|
| Lexeme | Description |
|
||||||
|
| ------ | ----------- |
|
||||||
|
| `and` | Logical AND |
|
||||||
|
| `or` | Logical OR |
|
||||||
|
|
||||||
|
#### Boolean Literals
|
||||||
|
|
||||||
|
| Lexeme | Description |
|
||||||
|
| ------- | ------------------- |
|
||||||
|
| `true` | Boolean true value |
|
||||||
|
| `false` | Boolean false value |
|
||||||
|
|
||||||
|
#### Primitive Type Keywords
|
||||||
|
|
||||||
|
| Lexeme | Description |
|
||||||
|
| ------ | ------------------------------ |
|
||||||
|
| `u8` | Unsigned 8-bit integer |
|
||||||
|
| `u16` | Unsigned 16-bit integer |
|
||||||
|
| `u32` | Unsigned 32-bit integer |
|
||||||
|
| `u64` | Unsigned 64-bit integer |
|
||||||
|
| `i8` | Signed 8-bit integer |
|
||||||
|
| `i16` | Signed 16-bit integer |
|
||||||
|
| `i32` | Signed 32-bit integer |
|
||||||
|
| `i64` | Signed 64-bit integer |
|
||||||
|
| `f32` | 32-bit IEEE 754 floating-point |
|
||||||
|
| `f64` | 64-bit IEEE 754 floating-point |
|
||||||
|
| `bool` | Boolean (`true` or `false`) |
|
||||||
|
| `char` | Unicode scalar value (32-bit) |
|
||||||
|
|
||||||
|
#### Pointer Keyword
|
||||||
|
|
||||||
|
| Lexeme | Description |
|
||||||
|
| -------- | ------------------------------------------------------- |
|
||||||
|
| `opaque` | Used in `*opaque` to denote a pointer with no type info |
|
||||||
|
|
||||||
|
#### Statement Keywords
|
||||||
|
|
||||||
|
| Lexeme | Description |
|
||||||
|
| ---------- | ------------------------------------- |
|
||||||
|
| `let` | Introduces a variable binding |
|
||||||
|
| `mut` | Marks a binding or pointer as mutable |
|
||||||
|
| `return` | Exits the enclosing function |
|
||||||
|
| `if` | Conditional statement |
|
||||||
|
| `else` | Alternative branch of an `if` |
|
||||||
|
| `while` | Condition-controlled loop |
|
||||||
|
| `loop` | Infinite loop |
|
||||||
|
| `break` | Exit the immediately enclosing loop |
|
||||||
|
| `continue` | Skip to the next iteration of a loop |
|
||||||
|
|
||||||
|
#### Definition Keywords
|
||||||
|
|
||||||
|
| Lexeme | Description |
|
||||||
|
| -------- | -------------------------------- |
|
||||||
|
| `fn` | Introduces a function definition |
|
||||||
|
| `struct` | Introduces a struct definition |
|
||||||
|
|
||||||
|
> **Lexer note:** All keywords above are reserved and must be recognised before
|
||||||
|
> the general `IDENT` rule. An identifier may not shadow any keyword.
|
||||||
|
|
||||||
|
### Delimiter / Punctuation Tokens
|
||||||
|
|
||||||
|
| Token | Lexeme | Description |
|
||||||
|
| ----------- | ------ | ------------------------------------------------------ |
|
||||||
|
| `LPAREN` | `(` | Left parenthesis |
|
||||||
|
| `RPAREN` | `)` | Right parenthesis |
|
||||||
|
| `LBRACKET` | `[` | Left square bracket |
|
||||||
|
| `RBRACKET` | `]` | Right square bracket |
|
||||||
|
| `COMMA` | `,` | Argument / element separator |
|
||||||
|
| `SEMICOLON` | `;` | Statement terminator / array size separator (`[T; N]`) |
|
||||||
|
| `LCURLY` | `{` | Block / compound expression open |
|
||||||
|
| `RCURLY` | `}` | Block / compound expression close |
|
||||||
|
| `ARROW` | `->` | Function return type separator |
|
||||||
|
| `COLON` | `:` | Type annotation separator |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Expressions
|
||||||
|
|
||||||
|
Expressions produce a value. The grammar defines them through a hierarchy of
|
||||||
|
precedence levels — lower in the list means lower precedence (binds less
|
||||||
|
tightly).
|
||||||
|
|
||||||
|
### Operator Precedence Table
|
||||||
|
|
||||||
|
| Level | Operators | Associativity | Description |
|
||||||
|
| ----- | --------------------------- | -------------- | -------------------------------- |
|
||||||
|
| 1 | `or` | left | Logical OR (lowest) |
|
||||||
|
| 2 | `and` | left | Logical AND |
|
||||||
|
| 3 | `\|` | left | Bitwise OR |
|
||||||
|
| 4 | `^` | left | Bitwise XOR |
|
||||||
|
| 5 | `&` | left | Bitwise AND |
|
||||||
|
| 6 | `+` `-` | left | Addition, subtraction |
|
||||||
|
| 7 | `*` `/` `%` | left | Multiplication, division, modulo |
|
||||||
|
| 8 | `!` `~` `-` `*` `&` | right (unary) | Prefix unary operators |
|
||||||
|
| 9 | `.` `[…]` `(…)` | left (postfix) | Member access, index, call |
|
||||||
|
| 10 | literals, identifiers, `()` | — | Primary expressions (highest) |
|
||||||
|
|
||||||
|
### Operator Descriptions
|
||||||
|
|
||||||
|
#### Binary Operators
|
||||||
|
|
||||||
|
| Operator | Name | Example | Notes |
|
||||||
|
| -------- | -------------- | --------- | -------------------------------------------- |
|
||||||
|
| `or` | Logical OR | `a or b` | Short-circuits; both operands must be `bool` |
|
||||||
|
| `and` | Logical AND | `a and b` | Short-circuits; both operands must be `bool` |
|
||||||
|
| `\|` | Bitwise OR | `a \| b` | Integer types |
|
||||||
|
| `^` | Bitwise XOR | `a ^ b` | Integer types |
|
||||||
|
| `&` | Bitwise AND | `a & b` | Integer types (binary context) |
|
||||||
|
| `+` | Addition | `a + b` | |
|
||||||
|
| `-` | Subtraction | `a - b` | |
|
||||||
|
| `*` | Multiplication | `a * b` | Binary context (both operands are values) |
|
||||||
|
| `/` | Division | `a / b` | Integer division truncates toward zero |
|
||||||
|
| `%` | Modulo | `a % b` | Sign follows the dividend |
|
||||||
|
|
||||||
|
#### Unary Prefix Operators
|
||||||
|
|
||||||
|
| Operator | Name | Example | Notes |
|
||||||
|
| -------- | ----------- | ------- | ------------------------------------------------ |
|
||||||
|
| `!` | Logical NOT | `!cond` | Operand must be `bool` |
|
||||||
|
| `~` | Bitwise NOT | `~mask` | Bitwise complement; integer types |
|
||||||
|
| `-` | Negation | `-x` | Arithmetic negation |
|
||||||
|
| `*` | Dereference | `*ptr` | Unary context; operand must be a pointer type |
|
||||||
|
| `&` | Address-of | `&x` | Unary context; produces a pointer to the operand |
|
||||||
|
|
||||||
|
#### Postfix Operators
|
||||||
|
|
||||||
|
| Operator | Name | Example | Notes |
|
||||||
|
| -------- | ------------- | ----------- | ------------------------------------------------- |
|
||||||
|
| `.` | Member access | `obj.field` | Accesses a named field or method of a struct/type |
|
||||||
|
| `[…]` | Subscript | `arr[i]` | Indexes into an array, slice, or map |
|
||||||
|
| `(…)` | Call | `f(a, b)` | Invokes a function or closure |
|
||||||
|
|
||||||
|
> **Disambiguation:** `*` and `&` are context-sensitive.
|
||||||
|
> When appearing as the first token of a `unary_expr` they are **unary**
|
||||||
|
> (dereference / address-of). When appearing between two `unary_expr`
|
||||||
|
> sub-trees inside `multiplicative_expr` or `bitand_expr` they are **binary**
|
||||||
|
> (multiplication / bitwise AND). The parser resolves this purely from
|
||||||
|
> grammatical position — no look-ahead beyond 1 token is required.
|
||||||
|
|
||||||
|
### Parenthesised Expressions
|
||||||
|
|
||||||
|
Any expression may be wrapped in parentheses to override default precedence:
|
||||||
|
|
||||||
|
```
|
||||||
|
(a + b) * c
|
||||||
|
```
|
||||||
|
|
||||||
|
### Function Call Argument List
|
||||||
|
|
||||||
|
Arguments are comma-separated expressions. A trailing comma is **not**
|
||||||
|
permitted at this grammar level.
|
||||||
|
|
||||||
|
```
|
||||||
|
f()
|
||||||
|
f(x)
|
||||||
|
f(x, y, z)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Examples
|
||||||
|
|
||||||
|
```flux
|
||||||
|
// Arithmetic
|
||||||
|
a + b * c - d % 2
|
||||||
|
|
||||||
|
// Bitwise
|
||||||
|
flags & MASK | extra ^ toggle
|
||||||
|
|
||||||
|
// Logical
|
||||||
|
ready and not_done or fallback
|
||||||
|
|
||||||
|
// Mixed unary / postfix
|
||||||
|
*ptr.field
|
||||||
|
&arr[i]
|
||||||
|
!cond
|
||||||
|
|
||||||
|
// Chained postfix
|
||||||
|
obj.method(arg1, arg2)[0].name
|
||||||
|
|
||||||
|
// Explicit precedence override
|
||||||
|
(a or b) and c
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Types
|
||||||
|
|
||||||
|
Types describe the shape and interpretation of values. All type positions in
|
||||||
|
the grammar reference the `type` non-terminal.
|
||||||
|
|
||||||
|
### Primitive Types
|
||||||
|
|
||||||
|
Primitive types are single-keyword types built into the language.
|
||||||
|
|
||||||
|
| Type | Kind | Width | Range / Notes |
|
||||||
|
| ------ | ---------------- | ------ | ------------------------------------------ |
|
||||||
|
| `u8` | Unsigned integer | 8-bit | 0 … 255 |
|
||||||
|
| `u16` | Unsigned integer | 16-bit | 0 … 65 535 |
|
||||||
|
| `u32` | Unsigned integer | 32-bit | 0 … 4 294 967 295 |
|
||||||
|
| `u64` | Unsigned integer | 64-bit | 0 … 2⁶⁴ − 1 |
|
||||||
|
| `i8` | Signed integer | 8-bit | −128 … 127 |
|
||||||
|
| `i16` | Signed integer | 16-bit | −32 768 … 32 767 |
|
||||||
|
| `i32` | Signed integer | 32-bit | −2 147 483 648 … 2 147 483 647 |
|
||||||
|
| `i64` | Signed integer | 64-bit | −2⁶³ … 2⁶³ − 1 |
|
||||||
|
| `f32` | Floating-point | 32-bit | IEEE 754 single precision |
|
||||||
|
| `f64` | Floating-point | 64-bit | IEEE 754 double precision |
|
||||||
|
| `bool` | Boolean | 1 byte | `true` or `false` |
|
||||||
|
| `char` | Unicode scalar | 32-bit | Any Unicode scalar value (not a surrogate) |
|
||||||
|
|
||||||
|
### Named Types
|
||||||
|
|
||||||
|
A named type is any user-defined type referenced by its identifier — typically a struct name. Because all primitive-type keywords (`u8`, `bool`, etc.) are reserved, an `IDENT` in type position is always a named type, never a primitive.
|
||||||
|
|
||||||
|
```flux
|
||||||
|
Point // struct Point { x: f32, y: f32 }
|
||||||
|
Node // struct Node { value: i64, next: *Node }
|
||||||
|
*Point // pointer to a named type
|
||||||
|
[Node; 8] // array of a named type
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pointer Types
|
||||||
|
|
||||||
|
A pointer type is written with a leading `*`.
|
||||||
|
|
||||||
|
| Syntax | Description |
|
||||||
|
| --------- | ------------------------------------------------------------------------------------- |
|
||||||
|
| `*T` | Typed pointer — points to a value of type `T` |
|
||||||
|
| `*opaque` | Opaque pointer — no compile-time pointee type information; equivalent to C's `void *` |
|
||||||
|
|
||||||
|
Pointer types may be nested: `**u8` is a pointer to a pointer to `u8`.
|
||||||
|
|
||||||
|
```flux
|
||||||
|
*u8 // pointer to u8
|
||||||
|
**i32 // pointer to pointer to i32
|
||||||
|
*opaque // untyped pointer
|
||||||
|
**opaque // pointer to untyped pointer
|
||||||
|
```
|
||||||
|
|
||||||
|
### Array Types
|
||||||
|
|
||||||
|
Arrays have a fixed size known at compile time.
|
||||||
|
|
||||||
|
```
|
||||||
|
[ <element-type> ; <size> ]
|
||||||
|
```
|
||||||
|
|
||||||
|
`<size>` must be a non-negative integer literal (`INT_LIT`). The element type
|
||||||
|
may itself be any `type`, including pointers or nested arrays.
|
||||||
|
|
||||||
|
```flux
|
||||||
|
[u8; 256] // array of 256 u8 values
|
||||||
|
[*u8; 4] // array of 4 pointers to u8
|
||||||
|
[[f32; 3]; 3] // 3×3 matrix of f32 (array of arrays)
|
||||||
|
[*opaque; 8] // array of 8 opaque pointers
|
||||||
|
```
|
||||||
|
|
||||||
|
### Type Grammar Summary
|
||||||
|
|
||||||
|
```ebnf
|
||||||
|
type = primitive_type | named_type | pointer_type | array_type ;
|
||||||
|
primitive_type = "u8" | "u16" | "u32" | "u64"
|
||||||
|
| "i8" | "i16" | "i32" | "i64"
|
||||||
|
| "f32" | "f64" | "bool" | "char" ;
|
||||||
|
named_type = IDENT ;
|
||||||
|
pointer_type = "*" , ( "opaque" | type ) ;
|
||||||
|
array_type = "[" , type , ";" , INT_LIT , "]" ;
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Struct Literals
|
||||||
|
|
||||||
|
A struct literal constructs a value of a named struct type by providing values for each field.
|
||||||
|
|
||||||
|
```
|
||||||
|
<TypeName> { <field>: <expr>, ... }
|
||||||
|
```
|
||||||
|
|
||||||
|
Fields may appear in any order and need not match the declaration order. No trailing comma is permitted.
|
||||||
|
|
||||||
|
### Examples
|
||||||
|
|
||||||
|
```flux
|
||||||
|
let p = Point { x: 1.0, y: 2.0 };
|
||||||
|
|
||||||
|
let n = Node {
|
||||||
|
value: 42,
|
||||||
|
next: get_next()
|
||||||
|
};
|
||||||
|
|
||||||
|
// Nested struct literal
|
||||||
|
let outer = Rect {
|
||||||
|
origin: Point { x: 0.0, y: 0.0 },
|
||||||
|
size: Point { x: 10.0, y: 5.0 }
|
||||||
|
};
|
||||||
|
|
||||||
|
// Empty struct
|
||||||
|
let u = Unit {};
|
||||||
|
```
|
||||||
|
|
||||||
|
### Struct Literals in Conditions
|
||||||
|
|
||||||
|
Struct literals are **not permitted** as the outermost expression in `if` and `while` conditions. This restriction exists because `{` after the condition is ambiguous — it could start a struct literal body or the statement block.
|
||||||
|
|
||||||
|
```flux
|
||||||
|
// ERROR — ambiguous: is `{` a struct body or the if block?
|
||||||
|
if Flags { verbose: true } { ... }
|
||||||
|
|
||||||
|
// OK — parentheses resolve the ambiguity
|
||||||
|
if (Flags { verbose: true }).verbose { ... }
|
||||||
|
```
|
||||||
|
|
||||||
|
The grammar enforces this through the `expr_ns` (no-struct) hierarchy used in condition positions. Struct literals remain valid everywhere else: `let`, `return`, function arguments, field values, etc.
|
||||||
|
|
||||||
|
### Struct Literal Grammar Summary
|
||||||
|
|
||||||
|
```ebnf
|
||||||
|
primary_expr = IDENT , [ struct_lit_body ] | INT_LIT | FLOAT_LIT
|
||||||
|
| STRING_LIT | CHAR_LIT | "true" | "false"
|
||||||
|
| "(" , expr , ")" ;
|
||||||
|
struct_lit_body = "{" , struct_field_list , "}" ;
|
||||||
|
struct_field_list = [ struct_field , { "," , struct_field } ] ;
|
||||||
|
struct_field = IDENT , ":" , expr ;
|
||||||
|
```
|
||||||
|
|
||||||
|
### No-Struct Expression (`expr_ns`)
|
||||||
|
|
||||||
|
`expr_ns` is a parallel expression hierarchy identical to `expr` except its primary level (`primary_expr_ns`) does not allow the `struct_lit_body` suffix after an `IDENT`. Struct literals are still permitted when enclosed in parentheses (`"(" , expr , ")"`), because the `(` unambiguously marks the start of a grouped expression.
|
||||||
|
|
||||||
|
`if_stmt` and `while_stmt` use `expr_ns` for their condition; all other expression positions use the full `expr`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Statements
|
||||||
|
|
||||||
|
Statements perform an action and do not produce a value. Each statement is
|
||||||
|
terminated by a semicolon `;`.
|
||||||
|
|
||||||
|
### Let Statement
|
||||||
|
|
||||||
|
Introduces a new named binding in the current scope.
|
||||||
|
|
||||||
|
```
|
||||||
|
let [mut] <name> [: <type>] [= <expr>] ;
|
||||||
|
```
|
||||||
|
|
||||||
|
| Part | Required | Description |
|
||||||
|
| ---------- | -------- | --------------------------------------------- |
|
||||||
|
| `mut` | no | Makes the binding mutable; omit for immutable |
|
||||||
|
| `<name>` | yes | The identifier being bound |
|
||||||
|
| `: <type>` | no | Explicit type annotation |
|
||||||
|
| `= <expr>` | no | Initialiser expression |
|
||||||
|
| `;` | yes | Statement terminator |
|
||||||
|
|
||||||
|
Bindings are **immutable by default**. Attempting to assign to a binding
|
||||||
|
declared without `mut` is a compile-time error.
|
||||||
|
|
||||||
|
At least one of the type annotation or the initialiser must be present so the
|
||||||
|
compiler can determine the binding's type. This is a semantic constraint, not a
|
||||||
|
syntactic one — the grammar permits bare `let x;` and the type checker rejects
|
||||||
|
it if no type can be inferred from context.
|
||||||
|
|
||||||
|
#### Examples
|
||||||
|
|
||||||
|
```flux
|
||||||
|
// Immutable, type inferred from initialiser
|
||||||
|
let x = 42;
|
||||||
|
|
||||||
|
// Immutable, explicit type
|
||||||
|
let y: f64 = 3.14;
|
||||||
|
|
||||||
|
// Mutable, type inferred
|
||||||
|
let mut count = 0;
|
||||||
|
|
||||||
|
// Mutable, explicit type, no initialiser (must be assigned before use)
|
||||||
|
let mut buf: [u8; 128];
|
||||||
|
|
||||||
|
// Mutable pointer to u32
|
||||||
|
let mut ptr: *u32 = &value;
|
||||||
|
|
||||||
|
// Shadowing a previous binding is allowed
|
||||||
|
let x = "hello"; // x is now a string, previous x is gone
|
||||||
|
```
|
||||||
|
|
||||||
|
### Return Statement
|
||||||
|
|
||||||
|
Exits the enclosing function immediately, optionally producing a return value.
|
||||||
|
|
||||||
|
```
|
||||||
|
return [<expr>] ;
|
||||||
|
```
|
||||||
|
|
||||||
|
`return;` (no expression) is used when the function's return type is the unit
|
||||||
|
type `()`. `return <expr>;` returns the value of the expression.
|
||||||
|
|
||||||
|
Explicit `return` is only needed for early exits. The idiomatic way to return a
|
||||||
|
value from a function is the implicit return of its body block.
|
||||||
|
|
||||||
|
```flux
|
||||||
|
return; // unit return
|
||||||
|
return 42; // return an integer
|
||||||
|
return x * 2 + 1; // return an expression
|
||||||
|
```
|
||||||
|
|
||||||
|
### Expression Statement
|
||||||
|
|
||||||
|
Evaluates an expression for its side effects; the resulting value is
|
||||||
|
discarded. A semicolon is required.
|
||||||
|
|
||||||
|
```
|
||||||
|
<expr> ;
|
||||||
|
```
|
||||||
|
|
||||||
|
```flux
|
||||||
|
do_something(x); // call for side effects
|
||||||
|
count + 1; // legal but silly — value discarded
|
||||||
|
```
|
||||||
|
|
||||||
|
### Statement Grammar Summary
|
||||||
|
|
||||||
|
```ebnf
|
||||||
|
stmt = let_stmt | return_stmt | if_stmt
|
||||||
|
| while_stmt | loop_stmt | break_stmt | continue_stmt
|
||||||
|
| block_stmt | expr_stmt ;
|
||||||
|
let_stmt = "let" , [ "mut" ] , IDENT , [ ":" , type ] , [ "=" , expr ] , ";" ;
|
||||||
|
return_stmt = "return" , [ expr ] , ";" ;
|
||||||
|
if_stmt = "if" , expr_ns , block_stmt , [ "else" , else_branch ] ;
|
||||||
|
else_branch = if_stmt | block_stmt ;
|
||||||
|
while_stmt = "while" , expr_ns , block_stmt ;
|
||||||
|
loop_stmt = "loop" , block_stmt ;
|
||||||
|
break_stmt = "break" , ";" ;
|
||||||
|
continue_stmt = "continue" , ";" ;
|
||||||
|
block_stmt = "{" , { stmt } , "}" ;
|
||||||
|
expr_stmt = expr , ";" ;
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## If Statement
|
||||||
|
|
||||||
|
Conditionally executes a block based on a boolean expression.
|
||||||
|
|
||||||
|
```
|
||||||
|
if <cond> <block> [else <else-branch>]
|
||||||
|
```
|
||||||
|
|
||||||
|
The condition `<cond>` must be an expression of type `bool`. The body is
|
||||||
|
always a `block_stmt` — braces are mandatory.
|
||||||
|
|
||||||
|
### Else Branch
|
||||||
|
|
||||||
|
The optional `else` branch is either a plain block or another `if` statement,
|
||||||
|
enabling `else if` chains of arbitrary length.
|
||||||
|
|
||||||
|
```flux
|
||||||
|
if x > 0 {
|
||||||
|
pos();
|
||||||
|
}
|
||||||
|
|
||||||
|
if x > 0 {
|
||||||
|
pos();
|
||||||
|
} else {
|
||||||
|
non_pos();
|
||||||
|
}
|
||||||
|
|
||||||
|
if x > 0 {
|
||||||
|
pos();
|
||||||
|
} else if x < 0 {
|
||||||
|
neg();
|
||||||
|
} else {
|
||||||
|
zero();
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### If Statement Grammar Summary
|
||||||
|
|
||||||
|
```ebnf
|
||||||
|
if_stmt = "if" , expr_ns , block_stmt , [ "else" , else_branch ] ;
|
||||||
|
else_branch = if_stmt | block_stmt ;
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## While Loop
|
||||||
|
|
||||||
|
Repeatedly executes a block as long as a boolean condition holds. The
|
||||||
|
condition is tested before each iteration; if it is false on entry, the body
|
||||||
|
never runs.
|
||||||
|
|
||||||
|
```
|
||||||
|
while <cond> <block>
|
||||||
|
```
|
||||||
|
|
||||||
|
```flux
|
||||||
|
let mut i = 0;
|
||||||
|
while i < 10 {
|
||||||
|
process(i);
|
||||||
|
i = i + 1;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### While Loop Grammar Summary
|
||||||
|
|
||||||
|
```ebnf
|
||||||
|
while_stmt = "while" , expr_ns , block_stmt ;
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Loop
|
||||||
|
|
||||||
|
Executes a block unconditionally and indefinitely. The loop runs until a
|
||||||
|
`break` or `return` inside the body transfers control out.
|
||||||
|
|
||||||
|
```
|
||||||
|
loop <block>
|
||||||
|
```
|
||||||
|
|
||||||
|
```flux
|
||||||
|
loop {
|
||||||
|
let msg = recv();
|
||||||
|
if msg.is_quit() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
handle(msg);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Loop Grammar Summary
|
||||||
|
|
||||||
|
```ebnf
|
||||||
|
loop_stmt = "loop" , block_stmt ;
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Break and Continue
|
||||||
|
|
||||||
|
`break` and `continue` are only valid inside the body of a `while` or `loop`.
|
||||||
|
The compiler enforces this as a semantic rule.
|
||||||
|
|
||||||
|
| Statement | Effect |
|
||||||
|
| ------------ | -------------------------------------------------------------- |
|
||||||
|
| `break ;` | Exits the immediately enclosing loop immediately |
|
||||||
|
| `continue ;` | Skips the rest of the current iteration; jumps to the next one |
|
||||||
|
|
||||||
|
For `while`, `continue` jumps back to the condition check. For `loop`,
|
||||||
|
`continue` jumps back to the top of the body.
|
||||||
|
|
||||||
|
```flux
|
||||||
|
let mut i = 0;
|
||||||
|
while i < 20 {
|
||||||
|
i = i + 1;
|
||||||
|
if i % 2 == 0 {
|
||||||
|
continue; // skip even numbers
|
||||||
|
}
|
||||||
|
if i > 15 {
|
||||||
|
break; // stop after 15
|
||||||
|
}
|
||||||
|
process(i);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Break / Continue Grammar Summary
|
||||||
|
|
||||||
|
```ebnf
|
||||||
|
break_stmt = "break" , ";" ;
|
||||||
|
continue_stmt = "continue" , ";" ;
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Block Statement
|
||||||
|
|
||||||
|
A block groups zero or more statements into a single statement and introduces
|
||||||
|
a new lexical scope. Blocks do not produce a value.
|
||||||
|
|
||||||
|
```
|
||||||
|
{ <stmt>* }
|
||||||
|
```
|
||||||
|
|
||||||
|
### Scoping
|
||||||
|
|
||||||
|
Bindings declared inside a block are not visible outside it. A binding in an
|
||||||
|
inner scope may shadow a name from an outer scope without affecting it.
|
||||||
|
|
||||||
|
```flux
|
||||||
|
let x = 1;
|
||||||
|
{
|
||||||
|
let x = 2; // shadows outer x inside this block only
|
||||||
|
f(x); // uses 2
|
||||||
|
}
|
||||||
|
// x is still 1 here
|
||||||
|
```
|
||||||
|
|
||||||
|
### Nesting
|
||||||
|
|
||||||
|
Blocks may be nested freely to any depth.
|
||||||
|
|
||||||
|
```flux
|
||||||
|
{
|
||||||
|
let a = compute_a();
|
||||||
|
{
|
||||||
|
let b = compute_b();
|
||||||
|
use(a, b);
|
||||||
|
}
|
||||||
|
// b is no longer in scope here
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Block Grammar Summary
|
||||||
|
|
||||||
|
```ebnf
|
||||||
|
block = "{" , { stmt } , "}" ;
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Top-Level Definitions
|
||||||
|
|
||||||
|
A Flux source file is a sequence of top-level definitions.
|
||||||
|
|
||||||
|
```ebnf
|
||||||
|
program = { top_level_def } ;
|
||||||
|
top_level_def = func_def | struct_def ;
|
||||||
|
```
|
||||||
|
|
||||||
|
The leading token unambiguously selects the definition kind: `fn` → function,
|
||||||
|
`struct` → struct.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Function Definition
|
||||||
|
|
||||||
|
Defines a named, callable function.
|
||||||
|
|
||||||
|
```
|
||||||
|
fn <name> ( [<params>] ) [-> <return-type>] <block>
|
||||||
|
```
|
||||||
|
|
||||||
|
| Part | Required | Description |
|
||||||
|
| ------------------ | -------- | -------------------------------------------------------- |
|
||||||
|
| `<name>` | yes | The function's identifier |
|
||||||
|
| `( [<params>] )` | yes | Comma-separated parameter list, may be empty |
|
||||||
|
| `-> <return-type>` | no | Return type; omitting it means the function returns `()` |
|
||||||
|
| `<block>` | yes | Function body — a `block_stmt` |
|
||||||
|
|
||||||
|
### Parameters
|
||||||
|
|
||||||
|
Each parameter is a name with a mandatory type annotation. Parameters are
|
||||||
|
immutable by default; `mut` makes the local binding mutable within the body.
|
||||||
|
|
||||||
|
```
|
||||||
|
[mut] <name> : <type>
|
||||||
|
```
|
||||||
|
|
||||||
|
```flux
|
||||||
|
fn add(a: i32, b: i32) -> i32 {
|
||||||
|
return a + b;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn greet(name: *u8) {
|
||||||
|
print(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn increment(mut x: i32) -> i32 {
|
||||||
|
x = x + 1;
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn apply(f: *opaque, mut buf: [u8; 64]) -> bool {
|
||||||
|
return call(f, &buf);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Return Type
|
||||||
|
|
||||||
|
If `->` is omitted the return type is implicitly `()` (the unit type). An
|
||||||
|
explicit `-> ()` is also permitted but redundant.
|
||||||
|
|
||||||
|
```flux
|
||||||
|
fn do_work() { // returns ()
|
||||||
|
side_effect();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_value() -> i64 { // returns i64
|
||||||
|
return 42;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Function Definition Grammar Summary
|
||||||
|
|
||||||
|
```ebnf
|
||||||
|
func_def = "fn" , IDENT , "(" , param_list , ")" , [ "->" , type ] , block_stmt ;
|
||||||
|
param_list = [ param , { "," , param } ] ;
|
||||||
|
param = [ "mut" ] , IDENT , ":" , type ;
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Struct Definition
|
||||||
|
|
||||||
|
Defines a named product type with zero or more typed fields.
|
||||||
|
|
||||||
|
```
|
||||||
|
struct <name> {
|
||||||
|
<field>: <type>,
|
||||||
|
...
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Fields are separated by commas. No trailing comma is permitted. An empty
|
||||||
|
struct (zero fields) is valid.
|
||||||
|
|
||||||
|
### Fields
|
||||||
|
|
||||||
|
Each field is a name and a type. Fields may be of any type including pointers,
|
||||||
|
arrays, and other structs. Field names must be unique within the struct.
|
||||||
|
|
||||||
|
```flux
|
||||||
|
struct Point {
|
||||||
|
x: f32,
|
||||||
|
y: f32
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Node {
|
||||||
|
value: i64,
|
||||||
|
next: *Node
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Buffer {
|
||||||
|
data: *u8,
|
||||||
|
len: u64,
|
||||||
|
cap: u64
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Unit {}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Member Access
|
||||||
|
|
||||||
|
Fields of a struct value are accessed with the `.` operator (defined in the
|
||||||
|
expression grammar). If the value is behind a pointer, dereference it first
|
||||||
|
with `*`.
|
||||||
|
|
||||||
|
```flux
|
||||||
|
let p: Point = make_point();
|
||||||
|
let x = p.x;
|
||||||
|
|
||||||
|
let ptr: *Point = get_point_ptr();
|
||||||
|
let y = (*ptr).y;
|
||||||
|
```
|
||||||
|
|
||||||
|
### Struct Definition Grammar Summary
|
||||||
|
|
||||||
|
```ebnf
|
||||||
|
struct_def = "struct" , IDENT , "{" , field_list , "}" ;
|
||||||
|
field_list = [ field , { "," , field } ] ;
|
||||||
|
field = IDENT , ":" , type ;
|
||||||
|
```
|
||||||
7
examples/fibonacci.flx
Normal file
7
examples/fibonacci.flx
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
fn fibonacci(n: u8) -> u64 {
|
||||||
|
if n < 2 {
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
return fibonacci(n - 1) + fibonacci(n - 2);
|
||||||
|
}
|
||||||
362
ll1_check.py
Normal file
362
ll1_check.py
Normal file
@@ -0,0 +1,362 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
ll1_check.py — Parse GRAMMAR.ebnf and verify the LL(1) property.
|
||||||
|
|
||||||
|
Usage: python ll1_check.py [grammar_file] [-v]
|
||||||
|
|
||||||
|
Algorithm
|
||||||
|
---------
|
||||||
|
1. Strip (* … *) comments; tokenise.
|
||||||
|
2. Parse ISO/IEC 14977 EBNF into an AST.
|
||||||
|
3. Normalise to plain BNF by introducing fresh helper non-terminals:
|
||||||
|
{ body } → _repN where _repN = body , _repN | ε
|
||||||
|
[ body ] → _optN where _optN = body | ε
|
||||||
|
( body ) → inlined (cross-product inside the parent sequence)
|
||||||
|
4. Compute FIRST and FOLLOW sets (fixed-point iteration).
|
||||||
|
5. For each non-terminal compute PREDICT sets; flag pairwise conflicts.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from collections import defaultdict
|
||||||
|
from itertools import count as _count
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
EPSILON = 'ε'
|
||||||
|
START = 'program' # grammar start symbol
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════ 1. Tokenise
|
||||||
|
|
||||||
|
_TOK = re.compile(
|
||||||
|
r'"(?:[^"\\]|\\.)*"' # "quoted terminal string"
|
||||||
|
r'|[A-Z][A-Z0-9_]*' # UPPERCASE token class (terminal)
|
||||||
|
r'|[a-z][a-z0-9_]*' # lowercase identifier (non-terminal)
|
||||||
|
r'|[=;,|()\[\]{}]' # single-char punctuation
|
||||||
|
)
|
||||||
|
|
||||||
|
def tokenise(src: str) -> list:
|
||||||
|
src = re.sub(r'\(\*.*?\*\)', ' ', src, flags=re.DOTALL)
|
||||||
|
return _TOK.findall(src)
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════ 2. Parse EBNF → AST
|
||||||
|
#
|
||||||
|
# Each AST node is a tuple:
|
||||||
|
# ('lit', s) terminal — quoted string "…" or UPPERCASE token class
|
||||||
|
# ('nt', s) non-terminal reference
|
||||||
|
# ('seq', [...]) concatenation (A , B , C)
|
||||||
|
# ('alt', [...]) alternation (A | B | C)
|
||||||
|
# ('opt', node) optional [ … ]
|
||||||
|
# ('rep', node) repetition { … }
|
||||||
|
|
||||||
|
class _Parser:
|
||||||
|
def __init__(self, tokens):
|
||||||
|
self._t = tokens
|
||||||
|
self._i = 0
|
||||||
|
|
||||||
|
def _peek(self):
|
||||||
|
return self._t[self._i] if self._i < len(self._t) else None
|
||||||
|
|
||||||
|
def _eat(self, expected=None):
|
||||||
|
v = self._t[self._i]; self._i += 1
|
||||||
|
if expected and v != expected:
|
||||||
|
raise SyntaxError(f'expected {expected!r}, got {v!r} '
|
||||||
|
f'(token #{self._i - 1})')
|
||||||
|
return v
|
||||||
|
|
||||||
|
def parse_grammar(self) -> dict:
|
||||||
|
rules = {}
|
||||||
|
while self._i < len(self._t):
|
||||||
|
name = self._eat()
|
||||||
|
self._eat('=')
|
||||||
|
rules[name] = self._body()
|
||||||
|
self._eat(';')
|
||||||
|
return rules
|
||||||
|
|
||||||
|
def _body(self):
|
||||||
|
alts = [self._seq()]
|
||||||
|
while self._peek() == '|':
|
||||||
|
self._eat()
|
||||||
|
alts.append(self._seq())
|
||||||
|
return alts[0] if len(alts) == 1 else ('alt', alts)
|
||||||
|
|
||||||
|
def _seq(self):
|
||||||
|
items = [self._atom()]
|
||||||
|
while self._peek() == ',':
|
||||||
|
self._eat()
|
||||||
|
items.append(self._atom())
|
||||||
|
return items[0] if len(items) == 1 else ('seq', items)
|
||||||
|
|
||||||
|
def _atom(self):
|
||||||
|
t = self._peek()
|
||||||
|
if t == '[':
|
||||||
|
self._eat(); b = self._body(); self._eat(']')
|
||||||
|
return ('opt', b)
|
||||||
|
if t == '{':
|
||||||
|
self._eat(); b = self._body(); self._eat('}')
|
||||||
|
return ('rep', b)
|
||||||
|
if t == '(':
|
||||||
|
self._eat(); b = self._body(); self._eat(')')
|
||||||
|
return b # group — return inner node directly
|
||||||
|
if t and (t[0] == '"' or t[0].isupper()):
|
||||||
|
return ('lit', self._eat())
|
||||||
|
if t and t[0].islower():
|
||||||
|
return ('nt', self._eat())
|
||||||
|
raise SyntaxError(f'unexpected token {t!r}')
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════ 3. Normalise
|
||||||
|
|
||||||
|
def normalise(ebnf: dict) -> tuple:
|
||||||
|
"""
|
||||||
|
Convert EBNF AST to plain BNF.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
bnf : dict[name → list[list[str]]]
|
||||||
|
Each inner list is one production; [] = ε production.
|
||||||
|
origins : dict[helper_name → parent_rule_name]
|
||||||
|
Maps generated helper names back to the rule that created them.
|
||||||
|
"""
|
||||||
|
bnf: dict = {}
|
||||||
|
origins: dict = {}
|
||||||
|
ctr = _count()
|
||||||
|
|
||||||
|
def fresh(tag: str, rule: str) -> str:
|
||||||
|
h = f'_{tag}{next(ctr)}'
|
||||||
|
origins[h] = rule
|
||||||
|
return h
|
||||||
|
|
||||||
|
def expand(node, rule: str, in_seq: bool = False) -> list:
|
||||||
|
"""
|
||||||
|
Return a list of alternative symbol sequences for this AST node.
|
||||||
|
|
||||||
|
in_seq: when True, an 'alt' node is wrapped in a fresh non-terminal
|
||||||
|
instead of being inlined. This prevents the cross-product
|
||||||
|
expansion of A , (B | C) , D from producing two productions
|
||||||
|
that both start with A — a common-prefix false positive that
|
||||||
|
would be misreported as an LL(1) conflict. The grammar is
|
||||||
|
already left-factored at the EBNF level; this preserves that.
|
||||||
|
"""
|
||||||
|
tag = node[0]
|
||||||
|
|
||||||
|
if tag == 'lit':
|
||||||
|
return [[node[1]]]
|
||||||
|
|
||||||
|
if tag == 'nt':
|
||||||
|
return [[node[1]]]
|
||||||
|
|
||||||
|
if tag == 'seq':
|
||||||
|
# Children of a seq are expanded with in_seq=True so that any
|
||||||
|
# alt node inside the sequence becomes a fresh non-terminal.
|
||||||
|
result = [[]]
|
||||||
|
for child in node[1]:
|
||||||
|
child_seqs = expand(child, rule, in_seq=True)
|
||||||
|
result = [a + b for a in result for b in child_seqs]
|
||||||
|
return result
|
||||||
|
|
||||||
|
if tag == 'alt':
|
||||||
|
if in_seq:
|
||||||
|
# Alt inside a seq: wrap in a fresh non-terminal (_grpN).
|
||||||
|
# Each alternative is expanded at top-level (in_seq=False).
|
||||||
|
h = fresh('grp', rule)
|
||||||
|
bnf[h] = [s for child in node[1]
|
||||||
|
for s in expand(child, rule, in_seq=False)]
|
||||||
|
return [[h]]
|
||||||
|
# Alt at the top level of a rule body: return alternatives directly.
|
||||||
|
return [s for child in node[1]
|
||||||
|
for s in expand(child, rule, in_seq=False)]
|
||||||
|
|
||||||
|
if tag == 'opt':
|
||||||
|
# [ body ] → _optN = body | ε
|
||||||
|
h = fresh('opt', rule)
|
||||||
|
bnf[h] = expand(node[1], rule) + [[]]
|
||||||
|
return [[h]]
|
||||||
|
|
||||||
|
if tag == 'rep':
|
||||||
|
# { body } → _repN = body , _repN | ε
|
||||||
|
h = fresh('rep', rule)
|
||||||
|
body_seqs = expand(node[1], rule)
|
||||||
|
bnf[h] = [s + [h] for s in body_seqs] + [[]]
|
||||||
|
return [[h]]
|
||||||
|
|
||||||
|
raise ValueError(f'unknown AST tag {tag!r}')
|
||||||
|
|
||||||
|
for name, node in ebnf.items():
|
||||||
|
bnf[name] = expand(node, name)
|
||||||
|
|
||||||
|
return bnf, origins
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════ 4. FIRST / FOLLOW
|
||||||
|
|
||||||
|
def first_of_seq(seq: list, first: dict, bnf: dict) -> set:
|
||||||
|
"""
|
||||||
|
FIRST set of a sequence of grammar symbols.
|
||||||
|
Returns a set of terminal strings; includes EPSILON if the whole
|
||||||
|
sequence can derive the empty string.
|
||||||
|
"""
|
||||||
|
result = set()
|
||||||
|
for sym in seq:
|
||||||
|
if sym not in bnf: # terminal symbol
|
||||||
|
result.add(sym)
|
||||||
|
return result # terminals never derive ε
|
||||||
|
sym_first = first[sym]
|
||||||
|
result |= sym_first - {EPSILON}
|
||||||
|
if EPSILON not in sym_first:
|
||||||
|
return result # this symbol is not nullable — stop
|
||||||
|
result.add(EPSILON) # every symbol in seq was nullable
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def compute_first(bnf: dict) -> dict:
|
||||||
|
first = defaultdict(set)
|
||||||
|
changed = True
|
||||||
|
while changed:
|
||||||
|
changed = False
|
||||||
|
for name, prods in bnf.items():
|
||||||
|
for prod in prods:
|
||||||
|
new = first_of_seq(prod, first, bnf)
|
||||||
|
if not new <= first[name]:
|
||||||
|
first[name] |= new
|
||||||
|
changed = True
|
||||||
|
return first
|
||||||
|
|
||||||
|
|
||||||
|
def compute_follow(bnf: dict, first: dict, start: str) -> dict:
|
||||||
|
follow = defaultdict(set)
|
||||||
|
follow[start].add('$')
|
||||||
|
changed = True
|
||||||
|
while changed:
|
||||||
|
changed = False
|
||||||
|
for name, prods in bnf.items():
|
||||||
|
for prod in prods:
|
||||||
|
for i, sym in enumerate(prod):
|
||||||
|
if sym not in bnf:
|
||||||
|
continue # skip terminals
|
||||||
|
# FIRST of what comes after sym in this production
|
||||||
|
rest_first = first_of_seq(prod[i + 1:], first, bnf)
|
||||||
|
before = len(follow[sym])
|
||||||
|
follow[sym] |= rest_first - {EPSILON}
|
||||||
|
if EPSILON in rest_first:
|
||||||
|
follow[sym] |= follow[name]
|
||||||
|
if len(follow[sym]) > before:
|
||||||
|
changed = True
|
||||||
|
return follow
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════ 5. LL(1) check
|
||||||
|
|
||||||
|
def predict_set(prod: list, name: str, first: dict, follow: dict, bnf: dict) -> set:
|
||||||
|
"""
|
||||||
|
PREDICT(A → prod) = (FIRST(prod) − {ε}) ∪ (FOLLOW(A) if ε ∈ FIRST(prod))
|
||||||
|
"""
|
||||||
|
f = first_of_seq(prod, first, bnf)
|
||||||
|
p = f - {EPSILON}
|
||||||
|
if EPSILON in f:
|
||||||
|
p |= follow[name]
|
||||||
|
return p
|
||||||
|
|
||||||
|
|
||||||
|
def check_ll1(bnf: dict, first: dict, follow: dict) -> list:
|
||||||
|
"""
|
||||||
|
For each non-terminal check that all PREDICT sets are pairwise disjoint.
|
||||||
|
Returns a list of conflict dicts.
|
||||||
|
"""
|
||||||
|
errors = []
|
||||||
|
for name, prods in bnf.items():
|
||||||
|
sets = [predict_set(p, name, first, follow, bnf) for p in prods]
|
||||||
|
for i in range(len(sets)):
|
||||||
|
for j in range(i + 1, len(sets)):
|
||||||
|
conflict = sets[i] & sets[j]
|
||||||
|
if conflict:
|
||||||
|
errors.append({
|
||||||
|
'rule': name,
|
||||||
|
'prod_i': prods[i],
|
||||||
|
'prod_j': prods[j],
|
||||||
|
'conflict': sorted(conflict),
|
||||||
|
})
|
||||||
|
return errors
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════ 6. Main
|
||||||
|
|
||||||
|
def _fmt_prod(prod: list) -> str:
|
||||||
|
return ' '.join(prod) if prod else EPSILON
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
argv = sys.argv[1:]
|
||||||
|
verbose = '-v' in argv
|
||||||
|
positional = [a for a in argv if not a.startswith('-')]
|
||||||
|
path = Path(positional[0]) if positional else Path('GRAMMAR.ebnf')
|
||||||
|
|
||||||
|
# ── Load & parse ──────────────────────────────────────────────────────
|
||||||
|
print(f'Checking {path} …')
|
||||||
|
try:
|
||||||
|
src = path.read_text(encoding='utf-8')
|
||||||
|
except FileNotFoundError:
|
||||||
|
sys.exit(f'error: file not found: {path}')
|
||||||
|
|
||||||
|
toks = tokenise(src)
|
||||||
|
try:
|
||||||
|
ebnf = _Parser(toks).parse_grammar()
|
||||||
|
except SyntaxError as exc:
|
||||||
|
sys.exit(f'EBNF parse error: {exc}')
|
||||||
|
|
||||||
|
bnf, origins = normalise(ebnf)
|
||||||
|
first = compute_first(bnf)
|
||||||
|
follow = compute_follow(bnf, first, START)
|
||||||
|
errors = check_ll1(bnf, first, follow)
|
||||||
|
|
||||||
|
# ── Summary line ──────────────────────────────────────────────────────
|
||||||
|
named = sorted(n for n in bnf if not n.startswith('_'))
|
||||||
|
helpers = sorted(n for n in bnf if n.startswith('_'))
|
||||||
|
print(f' {len(named)} named rules, {len(helpers)} generated helper rules\n')
|
||||||
|
|
||||||
|
# ── Optional verbose output ───────────────────────────────────────────
|
||||||
|
if verbose:
|
||||||
|
col = max((len(n) for n in named), default=0) + 2
|
||||||
|
print('── FIRST sets (named rules) ──────────────────────────────')
|
||||||
|
for n in named:
|
||||||
|
syms = sorted(first[n] - {EPSILON})
|
||||||
|
nullable = ' [nullable]' if EPSILON in first[n] else ''
|
||||||
|
print(f' FIRST({n}){"":<{col - len(n)}}= {{ {", ".join(syms)} }}{nullable}')
|
||||||
|
print()
|
||||||
|
print('── FOLLOW sets (named rules) ─────────────────────────────')
|
||||||
|
for n in named:
|
||||||
|
syms = sorted(follow[n])
|
||||||
|
print(f' FOLLOW({n}){"":<{col - len(n)}}= {{ {", ".join(syms)} }}')
|
||||||
|
print()
|
||||||
|
|
||||||
|
# ── LL(1) result ──────────────────────────────────────────────────────
|
||||||
|
named_err = [e for e in errors if not e['rule'].startswith('_')]
|
||||||
|
helper_err = [e for e in errors if e['rule'].startswith('_')]
|
||||||
|
|
||||||
|
if not errors:
|
||||||
|
print('✓ Grammar is LL(1) — no conflicts detected.')
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f'✗ {len(errors)} conflict(s): '
|
||||||
|
f'{len(named_err)} in named rules, '
|
||||||
|
f'{len(helper_err)} in generated helpers\n')
|
||||||
|
|
||||||
|
for e in named_err:
|
||||||
|
print(f' Rule [{e["rule"]}]')
|
||||||
|
print(f' alt A : {_fmt_prod(e["prod_i"])}')
|
||||||
|
print(f' alt B : {_fmt_prod(e["prod_j"])}')
|
||||||
|
print(f' ambiguous token(s): {e["conflict"]}\n')
|
||||||
|
|
||||||
|
if helper_err:
|
||||||
|
print(' Conflicts in generated helpers '
|
||||||
|
'(each is linked back to its enclosing named rule):')
|
||||||
|
for e in helper_err:
|
||||||
|
orig = origins.get(e['rule'], '?')
|
||||||
|
print(f' [{e["rule"]}] ← from rule [{orig}]')
|
||||||
|
print(f' alt A : {_fmt_prod(e["prod_i"])}')
|
||||||
|
print(f' alt B : {_fmt_prod(e["prod_j"])}')
|
||||||
|
print(f' ambiguous token(s): {e["conflict"]}\n')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user