From c2fc83b74bdfc16e4fe3bc5bdcdc6b57229e21e7 Mon Sep 17 00:00:00 2001 From: Jooris Hadeler Date: Wed, 11 Mar 2026 20:20:59 +0100 Subject: [PATCH] Feat: add LLVM IR backend with opt/llc pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements a full LLVM IR text emitter and three-step toolchain: 1. Emit LLVM IR (.ll) via alloca-based codegen (mem2reg-friendly) 2. `opt -O2` → optimised IR (override with FLUXC_OPT) 3. `llc -filetype=obj` → object file (override with FLUXC_LLC) 4. `cc` → link into executable (override with FLUXC_CC) (step 4 skipped in -c mode) Emitter supports all Flux types, operators, control flow (if/else, while, loop, break, continue), structs, arrays, pointer operations, function calls, string literals, and integer literal type inference via UnboundInt → concrete-type coercion. Also adds -o CLI flag, exposes CheckResult from the checker (sigma + phi tables reused by codegen), and updates main.rs to run the full parse → check → codegen pipeline. Co-Authored-By: Claude Sonnet 4.6 --- fluxc/src/ast.rs | 19 +- fluxc/src/checker/mod.rs | 19 +- fluxc/src/checker/tests.rs | 1 + fluxc/src/cli.rs | 25 +- fluxc/src/codegen/emit.rs | 1418 ++++++++++++++++++++++++++++++++++++ fluxc/src/codegen/mod.rs | 116 +++ fluxc/src/main.rs | 22 +- 7 files changed, 1608 insertions(+), 12 deletions(-) create mode 100644 fluxc/src/codegen/emit.rs create mode 100644 fluxc/src/codegen/mod.rs diff --git a/fluxc/src/ast.rs b/fluxc/src/ast.rs index 676a2ce..19b4324 100644 --- a/fluxc/src/ast.rs +++ b/fluxc/src/ast.rs @@ -42,14 +42,25 @@ pub enum Ty { Char, Unit, // Pointer types - Ptr { mutable: bool, pointee: Box }, - OpaquePtr { mutable: bool }, + Ptr { + mutable: bool, + pointee: Box, + }, + OpaquePtr { + mutable: bool, + }, // Array type - Array { elem: Box, size: u64 }, + Array { + elem: Box, + size: u64, + }, // User-defined struct Struct(String), // Internal function signature (not user-facing) - FnSig { params: Vec, ret: Box }, + FnSig { + params: Vec, + ret: Box, + }, /// Unresolved integer type from a literal or an unannotated let-binding. /// Compatible with every concrete integer type; defaults to `i32` in /// error messages when no concrete type can be inferred. diff --git a/fluxc/src/checker/mod.rs b/fluxc/src/checker/mod.rs index 6f81793..152153c 100644 --- a/fluxc/src/checker/mod.rs +++ b/fluxc/src/checker/mod.rs @@ -10,6 +10,17 @@ use crate::diagnostics::{Diagnostic, Label}; use crate::token::Span; use env::{FieldEntry, FuncTable, ParamEntry, StructTable}; +// ── Check result ─────────────────────────────────────────────────────────────── + +/// The result of running the semantic checker. Carries both the diagnostics and +/// the resolved symbol tables so that downstream passes (e.g. codegen) can +/// reuse them without re-running the checker. +pub struct CheckResult { + pub errors: Vec, + pub sigma: StructTable, + pub phi: FuncTable, +} + // ── Checker ──────────────────────────────────────────────────────────────────── pub struct Checker { @@ -166,7 +177,7 @@ fn value_struct_name(ty: &Ty) -> Option<&str> { // ── Entry point ──────────────────────────────────────────────────────────────── -pub fn check(program: &ast::Program, no_main: bool) -> Vec { +pub fn check(program: &ast::Program, no_main: bool) -> CheckResult { let mut checker = Checker::new(); // ── Pass 1: collect struct names + function signatures ──────────────────── @@ -288,5 +299,9 @@ pub fn check(program: &ast::Program, no_main: bool) -> Vec { } } - checker.errors + CheckResult { + errors: checker.errors, + sigma: checker.sigma, + phi: checker.phi, + } } diff --git a/fluxc/src/checker/tests.rs b/fluxc/src/checker/tests.rs index 8e010e7..fb52169 100644 --- a/fluxc/src/checker/tests.rs +++ b/fluxc/src/checker/tests.rs @@ -20,6 +20,7 @@ mod tests { parser.errors ); checker::check(&program, false) + .errors .into_iter() .map(|d| d.message) .collect() diff --git a/fluxc/src/cli.rs b/fluxc/src/cli.rs index 3de2755..da9f8cf 100644 --- a/fluxc/src/cli.rs +++ b/fluxc/src/cli.rs @@ -24,9 +24,14 @@ pub fn print_help() { "--version".bold(), ); println!( - " {} Compile without requiring a `main` entry point", + " {} Compile to object file (no `main` required, no linking)", "-c".bold(), ); + println!( + " {} {} Write output to ", + "-o".bold(), + "".bold(), + ); println!(); println!("{}", "ARGS:".bold().yellow()); println!( @@ -61,15 +66,19 @@ pub fn io_error(path: &str, err: std::io::Error) -> ! { pub struct Opts { pub files: Vec, - /// `-c`: compile without requiring a `main` entry point. + /// `-c`: compile to object file without requiring a `main` entry point. pub no_main: bool, + /// `-o `: write final output to this path. + pub output: Option, } pub fn parse_args() -> Opts { let mut files = Vec::new(); let mut no_main = false; + let mut output: Option = None; + let mut args = std::env::args().skip(1).peekable(); - for arg in std::env::args().skip(1) { + while let Some(arg) = args.next() { match arg.as_str() { "-h" | "--help" => { print_help(); @@ -80,6 +89,10 @@ pub fn parse_args() -> Opts { process::exit(0); } "-c" => no_main = true, + "-o" => match args.next() { + Some(path) => output = Some(path), + None => fatal("option `-o` requires an argument"), + }, flag if flag.starts_with('-') => { fatal(&format!("unknown option `{flag}`")); } @@ -91,5 +104,9 @@ pub fn parse_args() -> Opts { fatal("no input files — at least one source file is required"); } - Opts { files, no_main } + Opts { + files, + no_main, + output, + } } diff --git a/fluxc/src/codegen/emit.rs b/fluxc/src/codegen/emit.rs new file mode 100644 index 0000000..302025d --- /dev/null +++ b/fluxc/src/codegen/emit.rs @@ -0,0 +1,1418 @@ +//! LLVM IR text emitter for Flux. +//! +//! Uses an alloca-based approach (every local lives in an `alloca`; `mem2reg` +//! inside `opt -O2` promotes scalars to SSA registers). This keeps the +//! emitter simple: no phi nodes, no dominance analysis. +//! +//! Pointer types follow LLVM 15+ opaque-pointer conventions — every pointer is +//! `ptr` regardless of its pointee type. + +use std::collections::HashMap; +use std::fmt::Write as _; + +use crate::ast::{ + self, BinaryOp, Block, CompoundAssignOp, Expr, ExprKind, FuncDef, Parsed, Program, StmtKind, + Ty, UnaryOp, +}; +use crate::checker::env::{FuncTable, StructTable}; + +// ── Public entry point ───────────────────────────────────────────────────────── + +pub fn emit_program(program: &Program, sigma: &StructTable, phi: &FuncTable) -> String { + let mut e = Emitter::new(sigma, phi); + e.emit_module(program); + e.finish() +} + +// ── LLVM type helpers ────────────────────────────────────────────────────────── + +fn llvm_ty(ty: &Ty) -> String { + match ty { + Ty::U8 | Ty::I8 => "i8".into(), + Ty::U16 | Ty::I16 => "i16".into(), + Ty::U32 | Ty::I32 => "i32".into(), + Ty::U64 | Ty::I64 => "i64".into(), + Ty::F32 => "float".into(), + Ty::F64 => "double".into(), + Ty::Bool => "i1".into(), + Ty::Char => "i32".into(), + Ty::Unit => "void".into(), + Ty::Ptr { .. } | Ty::OpaquePtr { .. } => "ptr".into(), + Ty::Array { elem, size } => format!("[{} x {}]", size, llvm_ty(elem)), + Ty::Struct(name) => format!("%{name}"), + Ty::UnboundInt => "i32".into(), + Ty::FnSig { .. } | Ty::Error => "i32".into(), + } +} + +/// Bit-width of an integer or integer-like type (char = 32). +fn int_bits(ty: &Ty) -> u32 { + match ty { + Ty::U8 | Ty::I8 => 8, + Ty::U16 | Ty::I16 => 16, + Ty::U32 | Ty::I32 | Ty::Char | Ty::UnboundInt => 32, + Ty::U64 | Ty::I64 => 64, + _ => 0, + } +} + +fn is_signed(ty: &Ty) -> bool { + matches!(ty, Ty::I8 | Ty::I16 | Ty::I32 | Ty::I64 | Ty::UnboundInt) +} + +fn is_aggregate(ty: &Ty) -> bool { + matches!(ty, Ty::Struct(_) | Ty::Array { .. }) +} + +/// Resolve `UnboundInt` to a concrete type using an optional hint; default `i32`. +fn resolve(ty: &Ty, hint: Option<&Ty>) -> Ty { + if matches!(ty, Ty::UnboundInt) { + if let Some(h) = hint { + if h.is_integer() && !matches!(h, Ty::UnboundInt) { + return h.clone(); + } + } + return Ty::I32; + } + ty.clone() +} + +/// Convert a Flux escape sequence string (content between the quotes) to its +/// LLVM IR `c"..."` representation, appending a NUL terminator. +fn flux_str_to_llvm(s: &str) -> (String, usize) { + let mut out = String::new(); + let mut chars = s.chars().peekable(); + let mut byte_len = 0usize; + while let Some(c) = chars.next() { + if c == '\\' { + match chars.next() { + Some('n') => { + out.push_str("\\0A"); + byte_len += 1; + } + Some('t') => { + out.push_str("\\09"); + byte_len += 1; + } + Some('r') => { + out.push_str("\\0D"); + byte_len += 1; + } + Some('0') => { + out.push_str("\\00"); + byte_len += 1; + } + Some('\\') => { + out.push_str("\\5C"); + byte_len += 1; + } + Some('"') => { + out.push_str("\\22"); + byte_len += 1; + } + Some('\'') => { + out.push_str("\\27"); + byte_len += 1; + } + Some('x') => { + // \xNN + let h1 = chars.next().unwrap_or('0'); + let h2 = chars.next().unwrap_or('0'); + write!(out, "\\{h1}{h2}").unwrap(); + byte_len += 1; + } + Some(other) => { + out.push(other); + byte_len += other.len_utf8(); + } + None => {} + } + } else if c.is_ascii() && !c.is_ascii_control() && c != '"' && c != '\\' { + out.push(c); + byte_len += 1; + } else { + // Non-ASCII or control char → hex escape + let mut buf = [0u8; 4]; + for b in c.encode_utf8(&mut buf).bytes() { + write!(out, "\\{b:02X}").unwrap(); + byte_len += 1; + } + } + } + // NUL terminator + out.push_str("\\00"); + byte_len += 1; + (out, byte_len) +} + +/// Parse a Flux char literal (content between quotes) to its Unicode scalar value. +fn parse_char_lit(s: &str) -> u32 { + let mut chars = s.chars(); + match chars.next() { + Some('\\') => match chars.next() { + Some('n') => 10, + Some('t') => 9, + Some('r') => 13, + Some('0') => 0, + Some('\\') => 92, + Some('\'') => 39, + Some('"') => 34, + Some('x') => { + let h: String = chars.take(2).collect(); + u32::from_str_radix(&h, 16).unwrap_or(0) + } + Some(c) => c as u32, + None => 0, + }, + Some(c) => c as u32, + None => 0, + } +} + +/// Strip `_` separators from integer literals (e.g. `1_000` → `1000`). +fn clean_int_lit(s: &str) -> String { + s.replace('_', "") +} + +// ── Emitter ──────────────────────────────────────────────────────────────────── + +/// Local variable slot: the alloca pointer name and the declared Flux type. +#[derive(Clone)] +struct LocalVar { + alloca: String, + ty: Ty, +} + +struct Emitter<'a> { + sigma: &'a StructTable, + phi: &'a FuncTable, + + // Module-level output sections + type_decls: String, // struct type declarations + str_globals: String, // string literal globals + func_defs: String, // function definitions + + // String literal dedup table + str_map: HashMap, // content → global name + str_cnt: u32, + + // Per-function state (reset in emit_function) + alloca_sec: String, // alloca instructions (entry block) + body: String, // other instructions + locals: Vec<(String, LocalVar)>, // (name, var); stack, rightmost wins + tmp: u32, // next %t.N counter + lbl: u32, // next label counter + terminated: bool, // current basic block already has a terminator + ret_ty: Ty, // current function's return type + break_lbl: Option, // target for `break` + continue_lbl: Option, // target for `continue` +} + +impl<'a> Emitter<'a> { + fn new(sigma: &'a StructTable, phi: &'a FuncTable) -> Self { + Self { + sigma, + phi, + type_decls: String::new(), + str_globals: String::new(), + func_defs: String::new(), + str_map: HashMap::new(), + str_cnt: 0, + alloca_sec: String::new(), + body: String::new(), + locals: Vec::new(), + tmp: 0, + lbl: 0, + terminated: false, + ret_ty: Ty::Unit, + break_lbl: None, + continue_lbl: None, + } + } + + fn finish(self) -> String { + format!( + "{}\n{}\n{}", + self.type_decls, self.str_globals, self.func_defs + ) + } + + // ── Counters ────────────────────────────────────────────────────────────── + + fn next_tmp(&mut self) -> String { + let n = self.tmp; + self.tmp += 1; + format!("%t.{n}") + } + + fn next_lbl_pair(&mut self, prefix: &str) -> String { + let n = self.lbl; + self.lbl += 1; + format!("{prefix}.{n}") + } + + // ── Low-level emission ──────────────────────────────────────────────────── + + fn w(&mut self, line: &str) { + if !self.terminated { + writeln!(self.body, " {line}").unwrap(); + } + } + + fn emit_label(&mut self, lbl: &str) { + writeln!(self.body, "{lbl}:").unwrap(); + self.terminated = false; + } + + fn emit_br(&mut self, target: &str) { + if !self.terminated { + writeln!(self.body, " br label %{target}").unwrap(); + self.terminated = true; + } + } + + fn emit_cond_br(&mut self, cond: &str, t: &str, f: &str) { + if !self.terminated { + writeln!(self.body, " br i1 {cond}, label %{t}, label %{f}").unwrap(); + self.terminated = true; + } + } + + fn emit_alloca(&mut self, alloca_name: &str, ty: &Ty) { + writeln!(self.alloca_sec, " {alloca_name} = alloca {}", llvm_ty(ty)).unwrap(); + } + + // ── Locals ──────────────────────────────────────────────────────────────── + + fn push_local(&mut self, name: String, alloca: String, ty: Ty) { + self.locals.push((name, LocalVar { alloca, ty })); + } + + fn lookup_local(&self, name: &str) -> Option<&LocalVar> { + self.locals + .iter() + .rev() + .find(|(n, _)| n == name) + .map(|(_, v)| v) + } + + fn save_locals(&self) -> usize { + self.locals.len() + } + + fn restore_locals(&mut self, saved: usize) { + self.locals.truncate(saved); + } + + // ── String literal helpers ──────────────────────────────────────────────── + + fn intern_string(&mut self, content: &str) -> String { + if let Some(name) = self.str_map.get(content) { + return name.clone(); + } + let name = format!("@.str.{}", self.str_cnt); + self.str_cnt += 1; + let (llvm_content, byte_len) = flux_str_to_llvm(content); + writeln!( + self.str_globals, + "{name} = private unnamed_addr constant [{byte_len} x i8] c\"{llvm_content}\"" + ) + .unwrap(); + self.str_map.insert(content.to_string(), name.clone()); + name + } + + // ── Type inference (post-typecheck, errors already caught) ──────────────── + + /// Infer the Flux type of `expr` without emitting any code. + /// Uses `hint` to resolve `UnboundInt`; defaults to `i32`. + fn typeof_expr(&self, expr: &Expr, hint: Option<&Ty>) -> Ty { + match &expr.kind { + ExprKind::IntLit(_) => resolve(&Ty::UnboundInt, hint), + ExprKind::FloatLit(_) => Ty::F64, + ExprKind::StringLit(_) => Ty::Ptr { + mutable: false, + pointee: Box::new(Ty::Char), + }, + ExprKind::CharLit(_) => Ty::Char, + ExprKind::Bool(_) => Ty::Bool, + ExprKind::Ident(name) => self + .lookup_local(name) + .map(|v| v.ty.clone()) + .unwrap_or(Ty::Error), + ExprKind::Unary { + op, expr: inner, .. + } => { + let inner_ty = self.typeof_expr(inner, None); + match op { + UnaryOp::Neg | UnaryOp::BitNot => resolve(&inner_ty, hint), + UnaryOp::Not => Ty::Bool, + UnaryOp::Deref => match inner_ty { + Ty::Ptr { pointee, .. } => *pointee, + _ => Ty::Error, + }, + UnaryOp::AddrOf => Ty::Ptr { + mutable: true, + pointee: Box::new(inner_ty), + }, + } + } + ExprKind::Binary { op, lhs, rhs, .. } => match op { + BinaryOp::Assign => self.typeof_expr(lhs, None), + BinaryOp::Eq + | BinaryOp::Ne + | BinaryOp::Lt + | BinaryOp::Gt + | BinaryOp::Le + | BinaryOp::Ge + | BinaryOp::Or + | BinaryOp::And => Ty::Bool, + BinaryOp::Shl | BinaryOp::Shr => resolve(&self.typeof_expr(lhs, hint), hint), + _ => { + let lt = self.typeof_expr(lhs, hint); + let rt = self.typeof_expr(rhs, hint); + Ty::common(<, &rt).unwrap_or_else(|| resolve(<, hint)) + } + }, + ExprKind::CompoundAssign { lhs, .. } => self.typeof_expr(lhs, None), + ExprKind::Call { callee, .. } => { + if let ExprKind::Ident(name) = &callee.kind { + self.phi + .get(name) + .map(|e| e.ret.clone()) + .unwrap_or(Ty::Error) + } else { + Ty::Error + } + } + ExprKind::Field { + expr: base, field, .. + } => { + let base_ty = self.typeof_place_or_load(base); + if let Ty::Struct(sname) = base_ty { + self.sigma + .field_ty(&sname, field) + .cloned() + .unwrap_or(Ty::Error) + } else { + Ty::Error + } + } + ExprKind::Index { expr: base, .. } => match self.typeof_expr(base, None) { + Ty::Array { elem, .. } => *elem, + Ty::Ptr { pointee, .. } => *pointee, + _ => Ty::Error, + }, + ExprKind::StructLit { name, .. } => Ty::Struct(name.clone()), + ExprKind::Group(inner) => self.typeof_expr(inner, hint), + ExprKind::Error => Ty::Error, + } + } + + /// Like `typeof_expr` but for a place expression: strips one layer of + /// pointer if the base is accessed through a pointer. + fn typeof_place_or_load(&self, expr: &Expr) -> Ty { + let t = self.typeof_expr(expr, None); + match t { + Ty::Ptr { pointee, .. } => *pointee, + other => other, + } + } + + // ── Coercion emission ───────────────────────────────────────────────────── + + /// Emit zero/sign-extension or float promotion if needed. + /// Returns the (possibly new) SSA name holding the coerced value. + fn coerce(&mut self, val: &str, from: &Ty, to: &Ty) -> String { + if from == to { + return val.to_string(); + } + // UnboundInt: literal was already emitted at the right bit-width by the + // type system — just return as-is (LLVM infers width from context). + if matches!(from, Ty::UnboundInt) || matches!(to, Ty::UnboundInt) { + return val.to_string(); + } + // *mut T → *T: both are `ptr` in opaque-pointer mode + match (from, to) { + (Ty::Ptr { .. }, Ty::Ptr { .. }) | (Ty::OpaquePtr { .. }, Ty::Ptr { .. }) => { + return val.to_string(); + } + _ => {} + } + if from.is_integer() && to.is_integer() { + let fb = int_bits(from); + let tb = int_bits(to); + if fb == tb { + return val.to_string(); + } + let tmp = self.next_tmp(); + if fb < tb { + let op = if is_signed(from) { "sext" } else { "zext" }; + self.w(&format!( + "{tmp} = {op} {} {val} to {}", + llvm_ty(from), + llvm_ty(to) + )); + } else { + // truncation (shouldn't happen in well-typed code) + self.w(&format!( + "{tmp} = trunc {} {val} to {}", + llvm_ty(from), + llvm_ty(to) + )); + } + return tmp; + } + if matches!((from, to), (Ty::F32, Ty::F64)) { + let tmp = self.next_tmp(); + self.w(&format!("{tmp} = fpext float {val} to double")); + return tmp; + } + val.to_string() + } + + // ── Expression emission ─────────────────────────────────────────────────── + + /// Emit instructions for `expr` and return `(ssa_value, ty)`. + /// + /// For scalar types the SSA value is the actual data value. + /// For aggregate types (struct, array) the SSA value is a pointer to an + /// alloca holding the data (caller must not load it again). + fn emit_expr(&mut self, expr: &Expr, hint: Option<&Ty>) -> (String, Ty) { + match &expr.kind { + // ── Literals ─────────────────────────────────────────────────────── + ExprKind::IntLit(s) => { + let ty = resolve(&Ty::UnboundInt, hint); + (clean_int_lit(s), ty) + } + ExprKind::FloatLit(s) => (s.clone(), Ty::F64), + ExprKind::Bool(b) => (if *b { "1" } else { "0" }.into(), Ty::Bool), + ExprKind::CharLit(s) => (parse_char_lit(s).to_string(), Ty::Char), + + ExprKind::StringLit(s) => { + let global = self.intern_string(s); + let tmp = self.next_tmp(); + let (_, byte_len) = flux_str_to_llvm(s); + self.w(&format!( + "{tmp} = getelementptr [{byte_len} x i8], ptr {global}, i64 0, i64 0" + )); + let ty = Ty::Ptr { + mutable: false, + pointee: Box::new(Ty::Char), + }; + (tmp, ty) + } + + // ── Variable ─────────────────────────────────────────────────────── + ExprKind::Ident(name) => { + let v = self + .lookup_local(name) + .expect("undefined var in codegen") + .clone(); + if is_aggregate(&v.ty) { + // Return the alloca pointer directly; caller uses GEP / stores + (v.alloca.clone(), v.ty.clone()) + } else { + let tmp = self.next_tmp(); + self.w(&format!( + "{tmp} = load {}, ptr {}", + llvm_ty(&v.ty), + v.alloca + )); + (tmp, v.ty.clone()) + } + } + + // ── Grouped ──────────────────────────────────────────────────────── + ExprKind::Group(inner) => self.emit_expr(inner, hint), + + // ── Unary ────────────────────────────────────────────────────────── + ExprKind::Unary { + op, expr: inner, .. + } => { + match op { + UnaryOp::AddrOf => { + // Return the alloca/GEP pointer of the inner place + let (ptr, inner_ty) = self.emit_place(inner); + let ty = Ty::Ptr { + mutable: true, + pointee: Box::new(inner_ty), + }; + (ptr, ty) + } + UnaryOp::Deref => { + let (ptr, ptr_ty) = self.emit_expr(inner, None); + let pointee_ty = match &ptr_ty { + Ty::Ptr { pointee, .. } => *pointee.clone(), + _ => Ty::Error, + }; + if is_aggregate(&pointee_ty) { + // Aggregate behind pointer: return the pointer as the aggregate value + (ptr, pointee_ty) + } else { + let tmp = self.next_tmp(); + self.w(&format!("{tmp} = load {}, ptr {ptr}", llvm_ty(&pointee_ty))); + (tmp, pointee_ty) + } + } + UnaryOp::Neg => { + let inner_hint = hint; + let (val, ty) = self.emit_expr(inner, inner_hint); + let ty = resolve(&ty, hint); + let tmp = self.next_tmp(); + if ty.is_float() { + self.w(&format!("{tmp} = fneg {} {val}", llvm_ty(&ty))); + } else { + self.w(&format!("{tmp} = sub {} 0, {val}", llvm_ty(&ty))); + } + (tmp, ty) + } + UnaryOp::Not => { + let (val, _) = self.emit_expr(inner, Some(&Ty::Bool)); + let tmp = self.next_tmp(); + self.w(&format!("{tmp} = xor i1 {val}, 1")); + (tmp, Ty::Bool) + } + UnaryOp::BitNot => { + let (val, ty) = self.emit_expr(inner, hint); + let ty = resolve(&ty, hint); + let tmp = self.next_tmp(); + self.w(&format!("{tmp} = xor {} {val}, -1", llvm_ty(&ty))); + (tmp, ty) + } + } + } + + // ── Binary ───────────────────────────────────────────────────────── + ExprKind::Binary { + op, + lhs, + rhs, + op_span: _, + } => self.emit_binary(op, lhs, rhs, hint), + + // ── Compound assignment ──────────────────────────────────────────── + ExprKind::CompoundAssign { op, lhs, rhs, .. } => { + self.emit_compound_assign(op, lhs, rhs) + } + + // ── Field access ─────────────────────────────────────────────────── + ExprKind::Field { + expr: base, field, .. + } => { + let field_ty = self.typeof_expr(expr, hint); + let (fptr, _) = self.emit_field_ptr(base, field); + if is_aggregate(&field_ty) { + (fptr, field_ty) + } else { + let tmp = self.next_tmp(); + self.w(&format!("{tmp} = load {}, ptr {fptr}", llvm_ty(&field_ty))); + (tmp, field_ty) + } + } + + // ── Array / pointer index ────────────────────────────────────────── + ExprKind::Index { expr: base, index } => { + let elem_ty = self.typeof_expr(expr, hint); + let (eptr, _) = self.emit_index_ptr(base, index); + if is_aggregate(&elem_ty) { + (eptr, elem_ty) + } else { + let tmp = self.next_tmp(); + self.w(&format!("{tmp} = load {}, ptr {eptr}", llvm_ty(&elem_ty))); + (tmp, elem_ty) + } + } + + // ── Function call ────────────────────────────────────────────────── + ExprKind::Call { callee, args } => self.emit_call(callee, args, hint), + + // ── Struct literal ───────────────────────────────────────────────── + ExprKind::StructLit { name, fields, .. } => self.emit_struct_lit(name, fields), + + ExprKind::Error => ("0".into(), Ty::Error), + } + } + + // ── Binary operator emission ─────────────────────────────────────────────── + + fn emit_binary( + &mut self, + op: &BinaryOp, + lhs: &Expr, + rhs: &Expr, + hint: Option<&Ty>, + ) -> (String, Ty) { + if *op == BinaryOp::Assign { + return self.emit_assign_expr(lhs, rhs); + } + + // Comparison operators always return bool + let is_cmp = matches!( + op, + BinaryOp::Eq | BinaryOp::Ne | BinaryOp::Lt | BinaryOp::Gt | BinaryOp::Le | BinaryOp::Ge + ); + let is_logical = matches!(op, BinaryOp::Or | BinaryOp::And); + + if is_logical { + let (lv, _) = self.emit_expr(lhs, Some(&Ty::Bool)); + let (rv, _) = self.emit_expr(rhs, Some(&Ty::Bool)); + let tmp = self.next_tmp(); + let instr = if *op == BinaryOp::Or { "or" } else { "and" }; + self.w(&format!("{tmp} = {instr} i1 {lv}, {rv}")); + return (tmp, Ty::Bool); + } + + // For shifts, RHS can be any integer; LHS type determines result type + if matches!(op, BinaryOp::Shl | BinaryOp::Shr) { + let lhs_ty_raw = self.typeof_expr(lhs, hint); + let lhs_ty = resolve(&lhs_ty_raw, hint); + let (lv, lv_ty) = self.emit_expr(lhs, Some(&lhs_ty)); + let lv = self.coerce(&lv, &lv_ty, &lhs_ty); + let (rv, rv_ty) = self.emit_expr(rhs, None); + // Shift amount must match LHS bit width + let rv = self.coerce_to_same_width(&rv, &rv_ty, &lhs_ty); + let tmp = self.next_tmp(); + let instr = match op { + BinaryOp::Shl => "shl", + BinaryOp::Shr => { + if is_signed(&lhs_ty) { + "ashr" + } else { + "lshr" + } + } + _ => unreachable!(), + }; + self.w(&format!("{tmp} = {instr} {} {lv}, {rv}", llvm_ty(&lhs_ty))); + return (tmp, lhs_ty); + } + + // Determine common type for arithmetic/bitwise + let lhs_ty_raw = self.typeof_expr(lhs, hint); + let rhs_ty_raw = self.typeof_expr(rhs, hint); + let common = + Ty::common(&lhs_ty_raw, &rhs_ty_raw).unwrap_or_else(|| resolve(&lhs_ty_raw, hint)); + let common = resolve(&common, hint); + + let (lv, lv_ty) = self.emit_expr(lhs, Some(&common)); + let lv = self.coerce(&lv, &lv_ty, &common); + let (rv, rv_ty) = self.emit_expr(rhs, Some(&common)); + let rv = self.coerce(&rv, &rv_ty, &common); + + if is_cmp { + let tmp = self.next_tmp(); + let pred = cmp_pred(op, &common); + self.w(&format!("{tmp} = {pred} {} {lv}, {rv}", llvm_ty(&common))); + return (tmp, Ty::Bool); + } + + let tmp = self.next_tmp(); + let instr = arith_instr(op, &common); + self.w(&format!("{tmp} = {instr} {} {lv}, {rv}", llvm_ty(&common))); + (tmp, common) + } + + /// Coerce `val` to the same bit-width as `target` (for shift amounts). + fn coerce_to_same_width(&mut self, val: &str, from: &Ty, target: &Ty) -> String { + let fb = int_bits(from); + let tb = int_bits(target); + if fb == tb || fb == 0 || tb == 0 { + return val.to_string(); + } + let tmp = self.next_tmp(); + let target_lty = llvm_ty(target); + if fb < tb { + let op = if is_signed(from) { "sext" } else { "zext" }; + self.w(&format!( + "{tmp} = {op} {} {val} to {target_lty}", + llvm_ty(from) + )); + } else { + self.w(&format!( + "{tmp} = trunc {} {val} to {target_lty}", + llvm_ty(from) + )); + } + tmp + } + + fn emit_assign_expr(&mut self, lhs: &Expr, rhs: &Expr) -> (String, Ty) { + let lhs_ty = self.typeof_expr(lhs, None); + let (rv, rv_ty) = self.emit_expr(rhs, Some(&lhs_ty)); + let rv = self.coerce(&rv, &rv_ty, &lhs_ty); + self.emit_store_to_place(lhs, &rv, &lhs_ty); + (rv, lhs_ty) + } + + fn emit_compound_assign( + &mut self, + op: &CompoundAssignOp, + lhs: &Expr, + rhs: &Expr, + ) -> (String, Ty) { + let lhs_ty = resolve(&self.typeof_expr(lhs, None), None); + let (lv, _) = self.emit_expr(lhs, Some(&lhs_ty)); + let (rv, rv_ty) = self.emit_expr(rhs, Some(&lhs_ty)); + let rv = self.coerce(&rv, &rv_ty, &lhs_ty); + let tmp = self.next_tmp(); + let instr = compound_instr(op, &lhs_ty); + self.w(&format!("{tmp} = {instr} {} {lv}, {rv}", llvm_ty(&lhs_ty))); + self.emit_store_to_place(lhs, &tmp, &lhs_ty); + (tmp, lhs_ty) + } + + // ── Place helpers ───────────────────────────────────────────────────────── + + /// Emit instructions to get a pointer to the storage of a place expression. + /// Returns `(ptr_name, pointee_ty)`. + fn emit_place(&mut self, expr: &Expr) -> (String, Ty) { + match &expr.kind { + ExprKind::Ident(name) => { + let v = self + .lookup_local(name) + .expect("undefined var in emit_place") + .clone(); + (v.alloca.clone(), v.ty.clone()) + } + ExprKind::Unary { + op: UnaryOp::Deref, + expr: inner, + .. + } => { + let (ptr, ptr_ty) = self.emit_expr(inner, None); + let pointee = match ptr_ty { + Ty::Ptr { pointee, .. } => *pointee, + _ => Ty::Error, + }; + (ptr, pointee) + } + ExprKind::Field { + expr: base, field, .. + } => self.emit_field_ptr(base, field), + ExprKind::Index { expr: base, index } => self.emit_index_ptr(base, index), + ExprKind::Group(inner) => self.emit_place(inner), + _ => { + // Non-place: emit to a temp alloca (rare; covers error cases) + let (val, ty) = self.emit_expr(expr, None); + let tmp = self.next_tmp(); + let alloca = format!("{tmp}.place"); + self.emit_alloca(&alloca, &ty); + self.w(&format!("store {} {val}, ptr {alloca}", llvm_ty(&ty))); + (alloca, ty) + } + } + } + + /// Store `val` (of type `ty`) into the place expression. + fn emit_store_to_place(&mut self, place: &Expr, val: &str, ty: &Ty) { + let (ptr, _) = self.emit_place(place); + self.w(&format!("store {} {val}, ptr {ptr}", llvm_ty(ty))); + } + + /// Returns `(field_ptr, field_ty)`. + fn emit_field_ptr(&mut self, base: &Expr, field: &str) -> (String, Ty) { + let (struct_ptr, struct_ty) = self.emit_struct_ptr(base); + let struct_name = match &struct_ty { + Ty::Struct(n) => n.clone(), + _ => panic!("field access on non-struct"), + }; + + let fields = self.sigma.fields(&struct_name).unwrap_or(&[]); + let idx = fields + .iter() + .position(|f| f.name == field) + .expect("unknown field in codegen"); + let field_ty = fields[idx].ty.clone(); + + let tmp = self.next_tmp(); + self.w(&format!( + "{tmp} = getelementptr inbounds %{struct_name}, ptr {struct_ptr}, i32 0, i32 {idx}" + )); + (tmp, field_ty) + } + + /// Returns `(elem_ptr, elem_ty)`. + fn emit_index_ptr(&mut self, base: &Expr, index: &Expr) -> (String, Ty) { + let base_ty = self.typeof_expr(base, None); + let (idx_val, idx_ty) = self.emit_expr(index, Some(&Ty::I64)); + let idx_val = self.coerce(&idx_val, &idx_ty, &Ty::I64); + + match base_ty { + Ty::Array { elem, size } => { + let (arr_ptr, _) = self.emit_place(base); + let arr_lty = llvm_ty(&Ty::Array { + elem: elem.clone(), + size, + }); + let tmp = self.next_tmp(); + self.w(&format!( + "{tmp} = getelementptr inbounds {arr_lty}, ptr {arr_ptr}, i64 0, i64 {idx_val}" + )); + (tmp, *elem) + } + Ty::Ptr { pointee, .. } => { + let (ptr_val, _) = self.emit_expr(base, None); + let tmp = self.next_tmp(); + self.w(&format!( + "{tmp} = getelementptr inbounds {}, ptr {ptr_val}, i64 {idx_val}", + llvm_ty(&pointee) + )); + (tmp, *pointee) + } + _ => panic!("index on non-array/ptr type"), + } + } + + /// Get a pointer to the struct storage for GEP, returning `(ptr, struct_ty)`. + /// + /// - Direct struct variable: returns its alloca. + /// - Pointer-to-struct: returns the pointer value (points to the struct). + fn emit_struct_ptr(&mut self, base: &Expr) -> (String, Ty) { + let base_ty = self.typeof_expr(base, None); + match base_ty { + Ty::Ptr { pointee, .. } if matches!(*pointee, Ty::Struct(_)) => { + let (ptr_val, _) = self.emit_expr(base, None); + (ptr_val, *pointee) + } + ty @ Ty::Struct(_) => { + let (alloca, _) = self.emit_place(base); + (alloca, ty) + } + _ => panic!("field access on non-struct type"), + } + } + + // ── Function call ────────────────────────────────────────────────────────── + + fn emit_call( + &mut self, + callee: &Expr, + args: &[Expr], + _hint: Option<&Ty>, + ) -> (String, Ty) { + let func_name = match &callee.kind { + ExprKind::Ident(n) => n.clone(), + _ => panic!("indirect calls not yet supported"), + }; + let entry = self + .phi + .get(&func_name) + .expect("unknown function in codegen"); + let param_tys: Vec = entry.params.iter().map(|p| p.ty.clone()).collect(); + let ret_ty = entry.ret.clone(); + + let mut arg_vals = Vec::new(); + for (arg, param_ty) in args.iter().zip(param_tys.iter()) { + let (v, vty) = self.emit_expr(arg, Some(param_ty)); + let v = self.coerce(&v, &vty, param_ty); + arg_vals.push(format!("{} {v}", llvm_ty(param_ty))); + } + + let arg_str = arg_vals.join(", "); + + if ret_ty == Ty::Unit { + self.w(&format!("call void @{func_name}({arg_str})")); + ("0".into(), Ty::Unit) + } else { + let tmp = self.next_tmp(); + self.w(&format!( + "{tmp} = call {} @{func_name}({arg_str})", + llvm_ty(&ret_ty) + )); + (tmp, ret_ty) + } + } + + // ── Struct literal ───────────────────────────────────────────────────────── + + fn emit_struct_lit(&mut self, name: &str, fields: &[ast::StructField]) -> (String, Ty) { + let tmp = self.next_tmp(); + let alloca = format!("{tmp}.agg"); + self.emit_alloca(&alloca, &Ty::Struct(name.to_string())); + + // Build a map from field name → value for the literal + let field_vals: HashMap = { + let mut map = HashMap::new(); + for sf in fields { + // We need to know the declared field type for type hints + let decl_ty = self.sigma.field_ty(name, &sf.name).cloned(); + let (v, vty) = self.emit_expr(&sf.value, decl_ty.as_ref()); + let target_ty = decl_ty.unwrap_or_else(|| vty.clone()); + let v = self.coerce(&v, &vty, &target_ty); + map.insert(sf.name.clone(), (v, target_ty)); + } + map + }; + + // Emit stores in declaration order (GEP by index) + let decl_fields: Vec<(String, Ty)> = self + .sigma + .fields(name) + .unwrap_or(&[]) + .iter() + .map(|f| (f.name.clone(), f.ty.clone())) + .collect(); + + for (idx, (fname, fty)) in decl_fields.iter().enumerate() { + if let Some((val, _)) = field_vals.get(fname) { + let fptr = self.next_tmp(); + self.w(&format!( + "{fptr} = getelementptr inbounds %{name}, ptr {alloca}, i32 0, i32 {idx}" + )); + self.w(&format!("store {} {val}, ptr {fptr}", llvm_ty(fty))); + } + } + + (alloca, Ty::Struct(name.to_string())) + } + + // ── Statement emission ──────────────────────────────────────────────────── + + fn emit_block(&mut self, block: &Block) { + let saved = self.save_locals(); + for stmt in &block.stmts { + if self.terminated { + break; + } + self.emit_stmt(stmt); + } + self.restore_locals(saved); + } + + fn emit_stmt(&mut self, stmt: &ast::Stmt) { + match &stmt.kind { + // T-Let ───────────────────────────────────────────────────────────── + StmtKind::Let { + name, + ty: ann_ty, + init, + .. + } => { + // Determine the variable's type + let init_hint = ann_ty.as_ref().map(|t| self.resolve_ann_type(t)); + let decl_ty = if let Some(ref h) = init_hint { + h.clone() + } else if let Some(e) = init { + resolve(&self.typeof_expr(e, None), None) + } else { + Ty::I32 + }; + + // Alloca in entry section + let alloca = format!("%{name}.{}", self.tmp); + self.tmp += 1; + self.emit_alloca(&alloca, &decl_ty); + self.push_local(name.clone(), alloca.clone(), decl_ty.clone()); + + // Store initialiser + if let Some(e) = init { + let (v, vty) = self.emit_expr(e, Some(&decl_ty)); + let v = self.coerce(&v, &vty, &decl_ty); + if is_aggregate(&decl_ty) { + // Copy aggregate: load from source alloca, store into dest + let tmp = self.next_tmp(); + self.w(&format!("{tmp} = load {}, ptr {v}", llvm_ty(&decl_ty))); + self.w(&format!("store {} {tmp}, ptr {alloca}", llvm_ty(&decl_ty))); + } else { + self.w(&format!("store {} {v}, ptr {alloca}", llvm_ty(&decl_ty))); + } + } + } + + // T-Return ────────────────────────────────────────────────────────── + StmtKind::Return(expr) => { + let ret_ty = self.ret_ty.clone(); + match expr { + Some(e) => { + let (v, vty) = self.emit_expr(e, Some(&ret_ty)); + let v = self.coerce(&v, &vty, &ret_ty); + self.w(&format!("ret {} {v}", llvm_ty(&ret_ty))); + } + None => { + self.w("ret void"); + } + } + self.terminated = true; + } + + // T-If ────────────────────────────────────────────────────────────── + StmtKind::If { + cond, + then_block, + else_branch, + } => { + let (cv, _) = self.emit_expr(cond, Some(&Ty::Bool)); + let then_lbl = self.next_lbl_pair("then"); + let else_lbl = self.next_lbl_pair("else"); + let merge_lbl = self.next_lbl_pair("merge"); + + let has_else = else_branch.is_some(); + self.emit_cond_br( + &cv, + &then_lbl, + if has_else { &else_lbl } else { &merge_lbl }, + ); + + // then branch + self.emit_label(&then_lbl); + self.emit_block(then_block); + self.emit_br(&merge_lbl); + + // else branch (or fall-through to merge) + if let Some(branch) = else_branch { + self.emit_label(&else_lbl); + match branch { + ast::ElseBranch::Block(b) => self.emit_block(b), + ast::ElseBranch::If(s) => self.emit_stmt(s), + } + self.emit_br(&merge_lbl); + } + + self.emit_label(&merge_lbl); + } + + // T-While ─────────────────────────────────────────────────────────── + StmtKind::While { cond, body } => { + let cond_lbl = self.next_lbl_pair("while_cond"); + let body_lbl = self.next_lbl_pair("while_body"); + let after_lbl = self.next_lbl_pair("while_after"); + + self.emit_br(&cond_lbl); + self.emit_label(&cond_lbl); + let (cv, _) = self.emit_expr(cond, Some(&Ty::Bool)); + self.emit_cond_br(&cv, &body_lbl, &after_lbl); + + self.emit_label(&body_lbl); + let old_brk = self.break_lbl.replace(after_lbl.clone()); + let old_cnt = self.continue_lbl.replace(cond_lbl.clone()); + self.emit_block(body); + self.break_lbl = old_brk; + self.continue_lbl = old_cnt; + self.emit_br(&cond_lbl); + + self.emit_label(&after_lbl); + } + + // T-Loop ──────────────────────────────────────────────────────────── + StmtKind::Loop { body } => { + let body_lbl = self.next_lbl_pair("loop_body"); + let after_lbl = self.next_lbl_pair("loop_after"); + + self.emit_br(&body_lbl); + self.emit_label(&body_lbl); + let old_brk = self.break_lbl.replace(after_lbl.clone()); + let old_cnt = self.continue_lbl.replace(body_lbl.clone()); + self.emit_block(body); + self.break_lbl = old_brk; + self.continue_lbl = old_cnt; + self.emit_br(&body_lbl); + + self.emit_label(&after_lbl); + } + + StmtKind::Break => { + let tgt = self.break_lbl.clone().expect("break outside loop"); + self.emit_br(&tgt); + self.terminated = true; + } + + StmtKind::Continue => { + let tgt = self.continue_lbl.clone().expect("continue outside loop"); + self.emit_br(&tgt); + self.terminated = true; + } + + StmtKind::Block(b) => self.emit_block(b), + + StmtKind::Expr(e) => { + self.emit_expr(e, None); + } + + StmtKind::Error => {} + } + } + + // ── Function emission ───────────────────────────────────────────────────── + + fn emit_function(&mut self, f: &FuncDef) { + let entry = self.phi.get(&f.name).expect("function missing from phi"); + let ret_ty = entry.ret.clone(); + + // Build LLVM param list + let params: Vec = entry + .params + .iter() + .map(|p| format!("{} %p_{}", llvm_ty(&p.ty), p.name)) + .collect(); + + // Reset per-function state + self.alloca_sec = String::new(); + self.body = String::new(); + self.locals = Vec::new(); + self.tmp = 0; + self.lbl = 0; + self.terminated = false; + self.break_lbl = None; + self.continue_lbl = None; + self.ret_ty = ret_ty.clone(); + + // Clone param info to avoid borrow issues + let param_info: Vec<(String, Ty, bool)> = entry + .params + .iter() + .map(|p| (p.name.clone(), p.ty.clone(), p.mutable)) + .collect(); + + // Allocas + stores for parameters + for (name, ty, _) in ¶m_info { + let alloca = format!("%{name}.addr"); + self.emit_alloca(&alloca, ty); + self.w(&format!("store {} %p_{name}, ptr {alloca}", llvm_ty(ty))); + self.push_local(name.clone(), alloca, ty.clone()); + } + + // Emit body + self.emit_block(&f.body); + + // Default return if block didn't terminate + if !self.terminated { + match &ret_ty { + Ty::Unit => self.w("ret void"), + Ty::I32 => self.w("ret i32 0"), + _ => {} // checker guarantees non-unit fns always return + } + } + + // Assemble function text + let ll_ret = if ret_ty == Ty::Unit && f.name == "main" { + "i32".to_string() + } else { + llvm_ty(&ret_ty) + }; + + writeln!( + self.func_defs, + "define {ll_ret} @{}({}) {{", + f.name, + params.join(", ") + ) + .unwrap(); + writeln!(self.func_defs, "entry:").unwrap(); + self.func_defs.push_str(&self.alloca_sec.clone()); + // For main returning void, add i32 return after user ret void + if f.name == "main" && ret_ty == Ty::Unit { + // Replace "ret void" with nothing; we append "ret i32 0" below + // Actually simpler: just add a "ret i32 0" at the end + // The body already has "ret void" if the user wrote return; + // Let's emit the body as-is and then add a fallback ret i32 0 + // at the very end (unreachable if user returned, but valid IR). + self.func_defs.push_str(&self.body.clone()); + writeln!(self.func_defs, " ret i32 0").unwrap(); + } else { + self.func_defs.push_str(&self.body.clone()); + } + writeln!(self.func_defs, "}}").unwrap(); + writeln!(self.func_defs).unwrap(); + } + + // ── Module emission ─────────────────────────────────────────────────────── + + fn emit_module(&mut self, program: &Program) { + // Built-in string_view type + writeln!(self.type_decls, "%string_view = type {{ ptr, i64 }}").unwrap(); + + // User struct types (emit in program order; LLVM allows forward refs) + for def in &program.defs { + if let ast::TopLevelDefKind::Struct(s) = &def.kind { + let fields: Vec = self + .sigma + .fields(&s.name) + .unwrap_or(&[]) + .iter() + .map(|f| llvm_ty(&f.ty)) + .collect(); + writeln!( + self.type_decls, + "%{} = type {{ {} }}", + s.name, + fields.join(", ") + ) + .unwrap(); + } + } + + // Function definitions + for def in &program.defs { + if let ast::TopLevelDefKind::Func(f) = &def.kind { + self.emit_function(f); + } + } + } + + // ── Type annotation resolver ────────────────────────────────────────────── + + fn resolve_ann_type(&self, ty: &ast::Type) -> Ty { + match ty { + ast::Type::U8 => Ty::U8, + ast::Type::U16 => Ty::U16, + ast::Type::U32 => Ty::U32, + ast::Type::U64 => Ty::U64, + ast::Type::I8 => Ty::I8, + ast::Type::I16 => Ty::I16, + ast::Type::I32 => Ty::I32, + ast::Type::I64 => Ty::I64, + ast::Type::F32 => Ty::F32, + ast::Type::F64 => Ty::F64, + ast::Type::Bool => Ty::Bool, + ast::Type::Char => Ty::Char, + ast::Type::Unit => Ty::Unit, + ast::Type::Named(name, _) => Ty::Struct(name.clone()), + ast::Type::Pointer { mutable, pointee } => Ty::Ptr { + mutable: *mutable, + pointee: Box::new(self.resolve_ann_type(pointee)), + }, + ast::Type::OpaquePointer { mutable } => Ty::OpaquePtr { mutable: *mutable }, + ast::Type::Array { elem, size } => { + let n = size.parse::().unwrap_or(0); + Ty::Array { + elem: Box::new(self.resolve_ann_type(elem)), + size: n, + } + } + ast::Type::Error => Ty::Error, + } + } +} + +// ── Instruction name helpers ─────────────────────────────────────────────────── + +fn cmp_pred(op: &BinaryOp, ty: &Ty) -> &'static str { + if ty.is_float() { + match op { + BinaryOp::Eq => "fcmp oeq", + BinaryOp::Ne => "fcmp one", + BinaryOp::Lt => "fcmp olt", + BinaryOp::Gt => "fcmp ogt", + BinaryOp::Le => "fcmp ole", + BinaryOp::Ge => "fcmp oge", + _ => "fcmp oeq", + } + } else if ty.is_unsigned() { + match op { + BinaryOp::Eq => "icmp eq", + BinaryOp::Ne => "icmp ne", + BinaryOp::Lt => "icmp ult", + BinaryOp::Gt => "icmp ugt", + BinaryOp::Le => "icmp ule", + BinaryOp::Ge => "icmp uge", + _ => "icmp eq", + } + } else { + // signed / UnboundInt / Bool / Char + match op { + BinaryOp::Eq => "icmp eq", + BinaryOp::Ne => "icmp ne", + BinaryOp::Lt => "icmp slt", + BinaryOp::Gt => "icmp sgt", + BinaryOp::Le => "icmp sle", + BinaryOp::Ge => "icmp sge", + _ => "icmp eq", + } + } +} + +fn arith_instr(op: &BinaryOp, ty: &Ty) -> &'static str { + if ty.is_float() { + match op { + BinaryOp::Add => "fadd", + BinaryOp::Sub => "fsub", + BinaryOp::Mul => "fmul", + BinaryOp::Div => "fdiv", + BinaryOp::Rem => "frem", + _ => "fadd", + } + } else { + let signed = is_signed(ty); + match op { + BinaryOp::Add => "add", + BinaryOp::Sub => "sub", + BinaryOp::Mul => "mul", + BinaryOp::Div => { + if signed { + "sdiv" + } else { + "udiv" + } + } + BinaryOp::Rem => { + if signed { + "srem" + } else { + "urem" + } + } + BinaryOp::BitOr => "or", + BinaryOp::BitXor => "xor", + BinaryOp::BitAnd => "and", + _ => "add", + } + } +} + +fn compound_instr(op: &CompoundAssignOp, ty: &Ty) -> &'static str { + let signed = is_signed(ty); + let float = ty.is_float(); + match op { + CompoundAssignOp::Add => { + if float { + "fadd" + } else { + "add" + } + } + CompoundAssignOp::Sub => { + if float { + "fsub" + } else { + "sub" + } + } + CompoundAssignOp::Mul => { + if float { + "fmul" + } else { + "mul" + } + } + CompoundAssignOp::Div => { + if float { + "fdiv" + } else if signed { + "sdiv" + } else { + "udiv" + } + } + CompoundAssignOp::Rem => { + if float { + "frem" + } else if signed { + "srem" + } else { + "urem" + } + } + CompoundAssignOp::BitAnd => "and", + CompoundAssignOp::BitOr => "or", + CompoundAssignOp::BitXor => "xor", + CompoundAssignOp::Shl => "shl", + CompoundAssignOp::Shr => { + if is_signed(ty) { + "ashr" + } else { + "lshr" + } + } + } +} diff --git a/fluxc/src/codegen/mod.rs b/fluxc/src/codegen/mod.rs new file mode 100644 index 0000000..74576f8 --- /dev/null +++ b/fluxc/src/codegen/mod.rs @@ -0,0 +1,116 @@ +pub mod emit; + +use std::path::Path; +use std::process::{Command, Stdio}; +use std::{env, fs}; + +use crate::ast::{self, Parsed}; +use crate::checker::CheckResult; +use crate::cli::Opts; + +// ── Entry point ──────────────────────────────────────────────────────────────── + +/// Compile a parsed + type-checked program to a native binary (or object file +/// when `opts.no_main` is set). +/// +/// Pipeline: +/// 1. Emit LLVM IR text → temp `.ll` file +/// 2. `opt -O2` → optimised `.ll` file (`FLUXC_OPT` overrides `opt`) +/// 3. `llc -filetype=obj` → `.o` file (`FLUXC_LLC` overrides `llc`) +/// 4. `cc` link → executable (`FLUXC_CC` overrides `cc`) +/// (step 4 is skipped in `-c` mode) +pub fn compile( + input_path: &str, + program: &ast::Program, + result: CheckResult, + opts: &Opts, +) -> Result<(), String> { + // ── Derive output path ──────────────────────────────────────────────────── + let stem = Path::new(input_path) + .file_stem() + .map(|s| s.to_string_lossy().into_owned()) + .unwrap_or_else(|| "out".to_string()); + + let final_output = opts.output.clone().unwrap_or_else(|| { + if opts.no_main { + format!("{stem}.o") + } else { + stem.clone() + } + }); + + // ── Temp paths ──────────────────────────────────────────────────────────── + let tmp = env::temp_dir(); + let raw_ll = tmp.join(format!("fluxc_{stem}.ll")); + let opt_ll = tmp.join(format!("fluxc_{stem}.opt.ll")); + let obj = tmp.join(format!("fluxc_{stem}.o")); + + // ── Step 1: emit LLVM IR ────────────────────────────────────────────────── + let ir = emit::emit_program(program, &result.sigma, &result.phi); + fs::write(&raw_ll, &ir).map_err(|e| format!("cannot write IR to {}: {e}", raw_ll.display()))?; + + // ── Step 2: opt ─────────────────────────────────────────────────────────── + let opt_bin = tool_path("FLUXC_OPT", "opt"); + run( + &opt_bin, + &[ + "-O2", + raw_ll.to_str().unwrap(), + "-S", + "-o", + opt_ll.to_str().unwrap(), + ], + )?; + + // ── Step 3: llc ─────────────────────────────────────────────────────────── + let llc_bin = tool_path("FLUXC_LLC", "llc"); + run( + &llc_bin, + &[ + opt_ll.to_str().unwrap(), + "-filetype=obj", + "-o", + obj.to_str().unwrap(), + ], + )?; + + // ── Step 4: link (or copy object as final output) ───────────────────────── + if opts.no_main { + fs::copy(&obj, &final_output).map_err(|e| format!("cannot write {final_output}: {e}"))?; + } else { + let cc_bin = tool_path("FLUXC_CC", "cc"); + run(&cc_bin, &[obj.to_str().unwrap(), "-o", &final_output])?; + } + + // ── Clean up temp files ─────────────────────────────────────────────────── + let _ = fs::remove_file(&raw_ll); + let _ = fs::remove_file(&opt_ll); + let _ = fs::remove_file(&obj); + + Ok(()) +} + +// ── Helpers ─────────────────────────────────────────────────────────────────── + +fn tool_path(env_var: &str, default: &str) -> String { + env::var(env_var).unwrap_or_else(|_| default.to_string()) +} + +fn run(bin: &str, args: &[&str]) -> Result<(), String> { + let output = Command::new(bin) + .args(args) + .stdout(Stdio::inherit()) + .stderr(Stdio::piped()) + .output() + .map_err(|e| format!("failed to run `{bin}`: {e}"))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(format!( + "`{bin}` exited with {}\n{}", + output.status, + stderr.trim_end() + )); + } + Ok(()) +} diff --git a/fluxc/src/main.rs b/fluxc/src/main.rs index 075f00a..e42b16f 100644 --- a/fluxc/src/main.rs +++ b/fluxc/src/main.rs @@ -5,6 +5,7 @@ use crate::parser::Parser; pub mod ast; pub mod checker; pub mod cli; +pub mod codegen; pub mod diagnostics; pub mod lexer; pub mod parser; @@ -14,6 +15,10 @@ fn main() { let opts = cli::parse_args(); let mut had_errors = false; + // Collect (path, source, program, check_result) for all input files. + // We gate codegen on all files being error-free. + let mut compiled = Vec::new(); + for path in &opts.files { let content = fs::read_to_string(path).unwrap_or_else(|e| cli::io_error(path, e)); @@ -26,11 +31,24 @@ fn main() { } if parser.errors.is_empty() { - let sema_errors = checker::check(&program, opts.no_main); - for diag in &sema_errors { + let result = checker::check(&program, opts.no_main); + for diag in &result.errors { eprint!("{}", diag.render(&content, path)); had_errors = true; } + compiled.push((path.clone(), program, result)); + } + } + + if had_errors { + process::exit(1); + } + + // All files are clean — run codegen. + for (path, program, result) in compiled { + if let Err(e) = codegen::compile(&path, &program, result, &opts) { + eprintln!("{}: {e}", "error".to_string()); + had_errors = true; } }