Initial Flux language specification
Add the LL(1) context-free grammar (GRAMMAR.ebnf), token and syntax reference (SYNTAX.md), LL(1) verification tool (ll1_check.py), and a fibonacci example demonstrating the language.
This commit is contained in:
435
GRAMMAR.ebnf
Normal file
435
GRAMMAR.ebnf
Normal file
@@ -0,0 +1,435 @@
|
||||
(* Flux Language Grammar — Context-Free LL(1) Grammar *)
|
||||
(* ================================================================ *)
|
||||
(* *)
|
||||
(* Notation (ISO/IEC 14977 EBNF): *)
|
||||
(* rule = definition ; defines a rule (terminated by ;) *)
|
||||
(* a , b concatenation *)
|
||||
(* a | b alternation *)
|
||||
(* { a } zero or more repetitions of a *)
|
||||
(* [ a ] optional a (zero or one) *)
|
||||
(* ( a | b ) grouping *)
|
||||
(* "literal" terminal string *)
|
||||
(* *)
|
||||
(* UPPERCASE identifiers are lexical token classes whose value *)
|
||||
(* cannot be expressed as a single literal (e.g. IDENT, INT_LIT). *)
|
||||
(* They are NOT defined here — see SYNTAX.md. *)
|
||||
(* *)
|
||||
(* Unique/fixed tokens are written as quoted literals directly. *)
|
||||
(* *)
|
||||
(* Lowercase identifiers are non-terminals (grammar productions). *)
|
||||
|
||||
|
||||
(* ================================================================ *)
|
||||
(* Program (start symbol) *)
|
||||
(* ================================================================ *)
|
||||
|
||||
program = { top_level_def } ;
|
||||
|
||||
top_level_def = func_def
|
||||
| struct_def ;
|
||||
|
||||
|
||||
(* ================================================================ *)
|
||||
(* Expressions *)
|
||||
(* ================================================================ *)
|
||||
|
||||
expr = or_expr ;
|
||||
|
||||
|
||||
(* --- Logical OR (lowest-precedence binary operator) --- *)
|
||||
(* *)
|
||||
(* Uses keyword `or`; left-associative via iteration. *)
|
||||
|
||||
or_expr = and_expr , { "or" , and_expr } ;
|
||||
|
||||
|
||||
(* --- Logical AND --- *)
|
||||
(* *)
|
||||
(* Uses keyword `and`; left-associative via iteration. *)
|
||||
|
||||
and_expr = bitor_expr , { "and" , bitor_expr } ;
|
||||
|
||||
|
||||
(* --- Bitwise OR --- *)
|
||||
|
||||
bitor_expr = bitxor_expr , { "|" , bitxor_expr } ;
|
||||
|
||||
|
||||
(* --- Bitwise XOR --- *)
|
||||
|
||||
bitxor_expr = bitand_expr , { "^" , bitand_expr } ;
|
||||
|
||||
|
||||
(* --- Bitwise AND --- *)
|
||||
|
||||
bitand_expr = additive_expr , { "&" , additive_expr } ;
|
||||
|
||||
|
||||
(* --- Additive: addition and subtraction --- *)
|
||||
|
||||
additive_expr = multiplicative_expr ,
|
||||
{ ( "+" | "-" ) , multiplicative_expr } ;
|
||||
|
||||
|
||||
(* --- Multiplicative: multiplication, division, modulo --- *)
|
||||
|
||||
multiplicative_expr = unary_expr ,
|
||||
{ ( "*" | "/" | "%" ) , unary_expr } ;
|
||||
|
||||
|
||||
(* --- Unary operators (prefix, right-associative by recursion) --- *)
|
||||
(* *)
|
||||
(* "!" logical not *)
|
||||
(* "~" bitwise not *)
|
||||
(* "-" arithmetic negation *)
|
||||
(* "*" dereference (pointer indirection) *)
|
||||
(* "&" address-of *)
|
||||
|
||||
unary_expr = "!" , unary_expr
|
||||
| "~" , unary_expr
|
||||
| "-" , unary_expr
|
||||
| "*" , unary_expr
|
||||
| "&" , unary_expr
|
||||
| postfix_expr ;
|
||||
|
||||
|
||||
(* --- Postfix operators (left-associative via iteration) --- *)
|
||||
(* *)
|
||||
(* Postfix operators bind tighter than any prefix or binary form. *)
|
||||
(* Multiple postfix operations chain left-to-right. *)
|
||||
|
||||
postfix_expr = primary_expr , { postfix_op } ;
|
||||
|
||||
postfix_op = "." , IDENT (* member access *)
|
||||
| "[" , expr , "]" (* subscript/index *)
|
||||
| "(" , arg_list , ")" ; (* function call *)
|
||||
|
||||
|
||||
(* --- Primary expressions (highest precedence) --- *)
|
||||
(* *)
|
||||
(* LL(1) note: after IDENT, peek at the next token. *)
|
||||
(* "{" → parse struct_lit_body (struct literal) *)
|
||||
(* other → bare identifier reference *)
|
||||
|
||||
primary_expr = IDENT , [ struct_lit_body ] (* ident or struct lit *)
|
||||
| INT_LIT
|
||||
| FLOAT_LIT
|
||||
| STRING_LIT
|
||||
| CHAR_LIT
|
||||
| "true"
|
||||
| "false"
|
||||
| "(" , expr , ")" ; (* parenthesised *)
|
||||
|
||||
|
||||
(* --- Struct literal --- *)
|
||||
(* *)
|
||||
(* A struct literal constructs a value of a named struct type. *)
|
||||
(* IDENT "{" field: expr, ... "}" *)
|
||||
(* *)
|
||||
(* Field order need not match the struct definition order. *)
|
||||
(* No trailing comma is permitted (consistent with struct_def). *)
|
||||
(* *)
|
||||
(* LL(1) notes: *)
|
||||
(* struct_field_list: "}" → ε; IDENT → first field *)
|
||||
(* FIRST(struct_field) = {IDENT} *)
|
||||
(* FOLLOW(struct_field_list) = {"}"} *)
|
||||
(* Disjoint, so no look-ahead conflict. *)
|
||||
|
||||
struct_lit_body = "{" , struct_field_list , "}" ;
|
||||
|
||||
struct_field_list = [ struct_field , { "," , struct_field } ] ;
|
||||
|
||||
struct_field = IDENT , ":" , expr ;
|
||||
|
||||
|
||||
(* ================================================================ *)
|
||||
(* Argument List *)
|
||||
(* ================================================================ *)
|
||||
|
||||
arg_list = [ expr , { "," , expr } ] ;
|
||||
|
||||
|
||||
(* ================================================================ *)
|
||||
(* No-Struct Expression Hierarchy (expr_ns) *)
|
||||
(* ================================================================ *)
|
||||
(* *)
|
||||
(* Struct literals create an LL(1) ambiguity in if/while conditions:*)
|
||||
(* if Point { x: 1 } { ... } *)
|
||||
(* After "Point", "{" could open a struct literal OR the body block.*)
|
||||
(* *)
|
||||
(* Solution: define expr_ns — identical to expr except *)
|
||||
(* primary_expr_ns disallows the struct_lit_body suffix after IDENT.*)
|
||||
(* Struct literals ARE still allowed when parenthesised: *)
|
||||
(* if (Point { x: 1 }).flag { ... } *)
|
||||
(* *)
|
||||
(* if_stmt and while_stmt use expr_ns for their condition. *)
|
||||
(* All other expression positions use the full expr. *)
|
||||
|
||||
expr_ns = or_expr_ns ;
|
||||
|
||||
or_expr_ns = and_expr_ns , { "or" , and_expr_ns } ;
|
||||
and_expr_ns = bitor_expr_ns , { "and" , bitor_expr_ns } ;
|
||||
|
||||
bitor_expr_ns = bitxor_expr_ns , { "|" , bitxor_expr_ns } ;
|
||||
bitxor_expr_ns = bitand_expr_ns , { "^" , bitand_expr_ns } ;
|
||||
bitand_expr_ns = additive_expr_ns , { "&" , additive_expr_ns } ;
|
||||
|
||||
additive_expr_ns = multiplicative_expr_ns ,
|
||||
{ ( "+" | "-" ) , multiplicative_expr_ns } ;
|
||||
|
||||
multiplicative_expr_ns = unary_expr_ns ,
|
||||
{ ( "*" | "/" | "%" ) , unary_expr_ns } ;
|
||||
|
||||
unary_expr_ns = "!" , unary_expr_ns
|
||||
| "~" , unary_expr_ns
|
||||
| "-" , unary_expr_ns
|
||||
| "*" , unary_expr_ns
|
||||
| "&" , unary_expr_ns
|
||||
| postfix_expr_ns ;
|
||||
|
||||
postfix_expr_ns = primary_expr_ns , { postfix_op } ;
|
||||
|
||||
(* primary_expr_ns: same as primary_expr but IDENT is never *)
|
||||
(* followed by struct_lit_body. Note "(" , expr , ")" uses full *)
|
||||
(* expr, so struct literals are permitted inside parentheses. *)
|
||||
|
||||
primary_expr_ns = IDENT (* bare ident only *)
|
||||
| INT_LIT
|
||||
| FLOAT_LIT
|
||||
| STRING_LIT
|
||||
| CHAR_LIT
|
||||
| "true"
|
||||
| "false"
|
||||
| "(" , expr , ")" ; (* struct lit OK here *)
|
||||
|
||||
|
||||
(* ================================================================ *)
|
||||
(* Types *)
|
||||
(* ================================================================ *)
|
||||
|
||||
type = primitive_type
|
||||
| named_type
|
||||
| pointer_type
|
||||
| array_type ;
|
||||
|
||||
|
||||
(* --- Primitive types --- *)
|
||||
(* *)
|
||||
(* Unsigned integers : u8 u16 u32 u64 *)
|
||||
(* Signed integers : i8 i16 i32 i64 *)
|
||||
(* Floating-point : f32 f64 *)
|
||||
(* Other : bool char *)
|
||||
|
||||
primitive_type = "u8" | "u16" | "u32" | "u64"
|
||||
| "i8" | "i16" | "i32" | "i64"
|
||||
| "f32" | "f64"
|
||||
| "bool" | "char" ;
|
||||
|
||||
|
||||
(* --- Named types --- *)
|
||||
(* *)
|
||||
(* A user-defined type referenced by its identifier (e.g. a struct *)
|
||||
(* name). The lexer guarantees that all primitive-type keywords are *)
|
||||
(* reserved, so IDENT never clashes with primitive_type. *)
|
||||
|
||||
named_type = IDENT ;
|
||||
|
||||
|
||||
(* --- Pointer types --- *)
|
||||
(* *)
|
||||
(* "*" type — typed pointer; the pointee type is known. *)
|
||||
(* "*opaque" — untyped/opaque pointer (no pointee type info). *)
|
||||
(* *)
|
||||
(* LL(1) note: after "*", "opaque" is not in FIRST(type), so the *)
|
||||
(* two alternatives are always distinguishable with one token. *)
|
||||
|
||||
pointer_type = "*" , ( "opaque" | type ) ;
|
||||
|
||||
|
||||
(* --- Array types --- *)
|
||||
(* *)
|
||||
(* "[" type ";" INT_LIT "]" *)
|
||||
(* *)
|
||||
(* The element type and the fixed size (a non-negative integer *)
|
||||
(* literal) are separated by ";". Sizes that are constant *)
|
||||
(* expressions may be introduced in a later grammar revision. *)
|
||||
|
||||
array_type = "[" , type , ";" , INT_LIT , "]" ;
|
||||
|
||||
|
||||
(* ================================================================ *)
|
||||
(* Statements *)
|
||||
(* ================================================================ *)
|
||||
|
||||
stmt = let_stmt
|
||||
| return_stmt
|
||||
| if_stmt
|
||||
| while_stmt
|
||||
| loop_stmt
|
||||
| break_stmt
|
||||
| continue_stmt
|
||||
| block_stmt
|
||||
| expr_stmt ;
|
||||
|
||||
|
||||
(* --- Return statement --- *)
|
||||
(* *)
|
||||
(* Exits the enclosing function, optionally yielding a value. *)
|
||||
(* "return ;" is used when the function return type is (). *)
|
||||
(* *)
|
||||
(* LL(1): after "return", peek at next token. *)
|
||||
(* ";" → no expression (unit return) *)
|
||||
(* other → parse expr, then expect ";" *)
|
||||
(* ";" is not in FIRST(expr), so the two cases are unambiguous. *)
|
||||
|
||||
return_stmt = "return" , [ expr ] , ";" ;
|
||||
|
||||
|
||||
(* --- Expression statement --- *)
|
||||
(* *)
|
||||
(* Evaluates an expression for its side effects; the value is *)
|
||||
(* discarded. The ";" is mandatory. *)
|
||||
(* *)
|
||||
(* LL(1): at stmt level: *)
|
||||
(* "let" → let_stmt *)
|
||||
(* "return" → return_stmt *)
|
||||
(* "if" → if_stmt *)
|
||||
(* "while" → while_stmt *)
|
||||
(* "loop" → loop_stmt *)
|
||||
(* "break" → break_stmt *)
|
||||
(* "continue" → continue_stmt *)
|
||||
(* "{" → block_stmt *)
|
||||
(* other → expr_stmt *)
|
||||
|
||||
expr_stmt = expr , ";" ;
|
||||
|
||||
|
||||
(* --- If statement --- *)
|
||||
(* *)
|
||||
(* Conditionally executes a block. An optional "else" branch may *)
|
||||
(* follow; it is either a plain block or another "if" statement, *)
|
||||
(* enabling "else if" chains of arbitrary length. *)
|
||||
(* *)
|
||||
(* LL(1) notes: *)
|
||||
(* condition uses expr_ns — struct literals are forbidden at the *)
|
||||
(* outermost level to avoid ambiguity with the body block's "{". *)
|
||||
(* [ "else" ... ] — consume "else" iff next token is "else" *)
|
||||
(* else_branch: "if" → if_stmt (else-if); "{" → block_stmt *)
|
||||
(* The two else_branch alternatives start with distinct tokens, *)
|
||||
(* so no look-ahead conflict arises (no dangling-else ambiguity). *)
|
||||
|
||||
if_stmt = "if" , expr_ns , block_stmt , [ "else" , else_branch ] ;
|
||||
|
||||
else_branch = if_stmt (* else if *)
|
||||
| block_stmt ; (* plain else *)
|
||||
|
||||
|
||||
(* --- While loop --- *)
|
||||
(* *)
|
||||
(* Repeatedly executes the body as long as the condition is true. *)
|
||||
(* The condition is re-evaluated before every iteration. *)
|
||||
(* If the condition is false on the first check, the body never *)
|
||||
(* executes. *)
|
||||
(* *)
|
||||
(* Like if_stmt, the condition uses expr_ns to prevent struct *)
|
||||
(* literal ambiguity with the body block's opening "{". *)
|
||||
|
||||
while_stmt = "while" , expr_ns , block_stmt ;
|
||||
|
||||
|
||||
(* --- Infinite loop --- *)
|
||||
(* *)
|
||||
(* Executes the body unconditionally and indefinitely. The only *)
|
||||
(* ways to exit are "break" or "return" inside the body. *)
|
||||
|
||||
loop_stmt = "loop" , block_stmt ;
|
||||
|
||||
|
||||
(* --- Break and continue --- *)
|
||||
(* *)
|
||||
(* "break" exits the immediately enclosing "while" or "loop". *)
|
||||
(* "continue" skips the rest of the current iteration and jumps to *)
|
||||
(* the next condition check (while) or iteration (loop). *)
|
||||
(* Both are only valid inside a loop body; the compiler enforces *)
|
||||
(* this as a semantic rule. *)
|
||||
|
||||
break_stmt = "break" , ";" ;
|
||||
continue_stmt = "continue" , ";" ;
|
||||
|
||||
|
||||
(* --- Block statement --- *)
|
||||
(* *)
|
||||
(* A block groups zero or more statements into a single statement *)
|
||||
(* and introduces a new lexical scope. It does not produce a value. *)
|
||||
(* *)
|
||||
(* LL(1): at stmt level, "{" unambiguously selects block since no *)
|
||||
(* other stmt alternative starts with "{". *)
|
||||
|
||||
block_stmt = "{" , { stmt } , "}" ;
|
||||
|
||||
|
||||
(* --- Let statement --- *)
|
||||
(* *)
|
||||
(* Introduces a named binding in the current scope. *)
|
||||
(* Bindings are immutable by default; "mut" opts into mutability. *)
|
||||
(* *)
|
||||
(* The type annotation and the initialiser are both optional, but *)
|
||||
(* at least one must be present for the binding to be usable; *)
|
||||
(* the compiler enforces this as a semantic (not syntactic) rule. *)
|
||||
(* *)
|
||||
(* LL(1) notes: *)
|
||||
(* [ "mut" ] — consume "mut" iff the next token is "mut" *)
|
||||
(* [ ":" ... ] — consume iff next token is ":" *)
|
||||
(* [ "=" ... ] — consume iff next token is "=" *)
|
||||
(* All decision tokens are distinct, so no look-ahead conflict. *)
|
||||
|
||||
let_stmt = "let" , [ "mut" ] , IDENT ,
|
||||
[ ":" , type ] ,
|
||||
[ "=" , expr ] ,
|
||||
";" ;
|
||||
|
||||
|
||||
(* ================================================================ *)
|
||||
(* Top-Level Definitions *)
|
||||
(* ================================================================ *)
|
||||
|
||||
(* --- Function definition --- *)
|
||||
(* *)
|
||||
(* Defines a named function with a typed parameter list and an *)
|
||||
(* optional return type. Omitting "->" implies a return type of (). *)
|
||||
(* *)
|
||||
(* LL(1) notes: *)
|
||||
(* param_list: ")" → ε (empty list); else parse first param *)
|
||||
(* param: "mut" → consume; IDENT → skip (mut absent) *)
|
||||
(* [ "->" ... ]: consume iff next token is "->" *)
|
||||
(* "->" is a two-character token; distinct from all stmt-starting *)
|
||||
(* tokens, so no look-ahead conflict with block_stmt that follows *)
|
||||
|
||||
func_def = "fn" , IDENT , "(" , param_list , ")" ,
|
||||
[ "->" , type ] ,
|
||||
block_stmt ;
|
||||
|
||||
param_list = [ param , { "," , param } ] ;
|
||||
|
||||
(* Each parameter is an optionally-mutable name with a required *)
|
||||
(* type annotation. Mutability applies within the function body. *)
|
||||
|
||||
param = [ "mut" ] , IDENT , ":" , type ;
|
||||
|
||||
|
||||
(* --- Struct definition --- *)
|
||||
(* *)
|
||||
(* Defines a named product type with zero or more typed fields. *)
|
||||
(* Fields are separated by commas; no trailing comma is permitted. *)
|
||||
(* *)
|
||||
(* LL(1) notes: *)
|
||||
(* field_list: "}" → ε (empty struct); else parse first field *)
|
||||
(* FIRST(field) = {IDENT}, FOLLOW(field_list) = {"}"} *)
|
||||
(* Disjoint, so no look-ahead conflict. *)
|
||||
(* top_level_def: "fn" → func_def; "struct" → struct_def *)
|
||||
|
||||
struct_def = "struct" , IDENT , "{" , field_list , "}" ;
|
||||
|
||||
field_list = [ field , { "," , field } ] ;
|
||||
|
||||
field = IDENT , ":" , type ;
|
||||
Reference in New Issue
Block a user