From e89a93aa86e3193462ccbb8a4cf00a1ed3aacde0 Mon Sep 17 00:00:00 2001 From: LunaStev Date: Sat, 17 Jan 2026 22:54:55 +0900 Subject: [PATCH] feat: implement method calls, recursive structs, and pointer comparisons This commit introduces several core enhancements to the LLVM backend and the parser, including support for method-style function calls, recursive struct definitions via opaque types, and robust pointer arithmetic/comparisons. Changes: - **Method Calls & Structs**: - Implemented method-style calls (`obj.method()`) by resolving to `struct_name_method_name` functions and automatically passing `self`. - Refactored struct type generation to use **Opaque Structs**, allowing for recursive data structures (e.g., linked lists). - Improved `FieldAccess` to use centralized address generation logic. - Added implicit type coercion for struct literal fields. - **Pointer & Type Logic**: - Added support for binary comparisons between pointers (`ptr == ptr`) and mixed pointer-integer comparisons (`ptr == 0`). - Implemented implicit type coercion for assignments and return statements. - Added integer promotion (z-extend) for values smaller than 32-bit in formatted IO functions. - **Lexer & Parser Enhancements**: - Added safety check to prevent unescaped newlines in string literals. - Updated type parser to skip newlines, improving formatting flexibility for generic types. - Registered `CharLiteral` as a valid expression start. - **Formatting Engine**: - Enhanced the formatting logic to support specifiers within placeholders (e.g., `{x}` for hex, `{c}` for char, `{p}` for pointer). - Added automatic C-string (`%s`) mapping for `i8` pointers. - **Testing & Utils**: - Significantly updated `test56.wave` with a robust TCP socket server implementation demonstrating new syscalls, structs, and methods. - Added a Python test runner helper for verifying server-side responses. - Added `parse_placeholders` to `utils` for advanced format specifier parsing. This update significantly increases the language's expressiveness, enabling more complex system-level programming patterns. Signed-off-by: LunaStev --- front/lexer/src/lexer/literals.rs | 6 +- front/parser/src/parser/types.rs | 5 +- .../expression/rvalue/assign.rs | 41 ++-- .../expression/rvalue/binary.rs | 69 +++++++ .../llvm_temporary/expression/rvalue/calls.rs | 86 ++++++++- .../expression/rvalue/structs.rs | 51 ++--- .../src/llvm_temporary/llvm_codegen/format.rs | 46 +++-- .../src/llvm_temporary/llvm_codegen/ir.rs | 27 ++- .../src/llvm_temporary/statement/control.rs | 93 ++++----- .../src/llvm_temporary/statement/io.rs | 11 ++ test/test56.wave | 182 ++++++++++++++---- test/test75.wave | 4 +- test/test76.wave | 5 + tools/run_tests.py | 46 +++++ utils/src/formatx.rs | 32 +++ 15 files changed, 541 insertions(+), 163 deletions(-) create mode 100644 test/test76.wave diff --git a/front/lexer/src/lexer/literals.rs b/front/lexer/src/lexer/literals.rs index e969203d..d45349f0 100644 --- a/front/lexer/src/lexer/literals.rs +++ b/front/lexer/src/lexer/literals.rs @@ -2,10 +2,12 @@ use super::Lexer; impl<'a> Lexer<'a> { pub(crate) fn string(&mut self) -> String { - if self.peek() == '"' { self.advance(); } - let mut string_literal = String::new(); while !self.is_at_end() && self.peek() != '"' { + if self.peek() == '\n' { + panic!("Unterminated string (newline in string literal)."); + } + let c = self.advance(); if c == '\\' { diff --git a/front/parser/src/parser/types.rs b/front/parser/src/parser/types.rs index a4ee5965..d977fa37 100644 --- a/front/parser/src/parser/types.rs +++ b/front/parser/src/parser/types.rs @@ -63,6 +63,7 @@ pub fn is_expression_start(token_type: &TokenType) -> bool { | TokenType::Lbrack | TokenType::Asm | TokenType::Deref + | TokenType::CharLiteral(_) ) } @@ -218,7 +219,9 @@ pub fn parse_type_from_stream(tokens: &mut Peekable>) -> Option( env: &mut ExprGenEnv<'ctx, 'a>, @@ -14,6 +15,34 @@ pub(crate) fn gen_assign_operation<'ctx, 'a>( env.context, env.builder, target, env.variables, env.module, env.struct_types, env.struct_field_indices ); + let element_type = match ptr.get_type().get_element_type() { + AnyTypeEnum::IntType(t) => BasicTypeEnum::IntType(t), + AnyTypeEnum::FloatType(t) => BasicTypeEnum::FloatType(t), + AnyTypeEnum::PointerType(t) => BasicTypeEnum::PointerType(t), + AnyTypeEnum::ArrayType(t) => BasicTypeEnum::ArrayType(t), + AnyTypeEnum::StructType(t) => BasicTypeEnum::StructType(t), + AnyTypeEnum::VectorType(t) => BasicTypeEnum::VectorType(t), + _ => panic!("Unsupported LLVM element type"), + }; + + if matches!(operator, AssignOperator::Assign) { + let mut rhs = env.gen(value, Some(element_type)); + + if rhs.get_type() != element_type { + rhs = coerce_basic_value( + env.context, + env.builder, + rhs, + element_type, + "assign_cast", + CoercionMode::Implicit, + ); + } + + env.builder.build_store(ptr, rhs).unwrap(); + return rhs; + } + let current_val = env.builder.build_load(ptr, "load_current").unwrap(); let new_val = env.gen(value, Some(current_val.get_type())); @@ -55,17 +84,7 @@ pub(crate) fn gen_assign_operation<'ctx, 'a>( AssignOperator::RemAssign => env.builder.build_float_rem(lhs, rhs, "rem_assign").unwrap().as_basic_value_enum(), }, - _ => panic!("Type mismatch or unsupported type in AssignOperation"), - }; - - let element_type = match ptr.get_type().get_element_type() { - AnyTypeEnum::IntType(t) => BasicTypeEnum::IntType(t), - AnyTypeEnum::FloatType(t) => BasicTypeEnum::FloatType(t), - AnyTypeEnum::PointerType(t) => BasicTypeEnum::PointerType(t), - AnyTypeEnum::ArrayType(t) => BasicTypeEnum::ArrayType(t), - AnyTypeEnum::StructType(t) => BasicTypeEnum::StructType(t), - AnyTypeEnum::VectorType(t) => BasicTypeEnum::VectorType(t), - _ => panic!("Unsupported LLVM element type"), + _ => panic!("AssignOperation (+=, -=, ...) only supports numeric types"), }; let result_casted = match (result, element_type) { diff --git a/llvm_temporary/src/llvm_temporary/expression/rvalue/binary.rs b/llvm_temporary/src/llvm_temporary/expression/rvalue/binary.rs index 6f7f0106..cfefcaa7 100644 --- a/llvm_temporary/src/llvm_temporary/expression/rvalue/binary.rs +++ b/llvm_temporary/src/llvm_temporary/expression/rvalue/binary.rs @@ -144,6 +144,75 @@ pub(crate) fn gen<'ctx, 'a>( _ => panic!("Unsupported mixed-type operator (float + int)"), } } + (BasicValueEnum::PointerValue(lp), BasicValueEnum::PointerValue(rp)) => { + let i64_ty = env.context.i64_type(); + let li = env.builder.build_ptr_to_int(lp, i64_ty, "l_ptr2int").unwrap(); + let ri = env.builder.build_ptr_to_int(rp, i64_ty, "r_ptr2int").unwrap(); + + let mut result = match operator { + Operator::Equal => env.builder.build_int_compare(IntPredicate::EQ, li, ri, "ptreq").unwrap(), + Operator::NotEqual => env.builder.build_int_compare(IntPredicate::NE, li, ri, "ptrne").unwrap(), + _ => panic!("Unsupported pointer operator: {:?}", operator), + }; + + if let Some(inkwell::types::BasicTypeEnum::IntType(target_ty)) = expected_type { + if result.get_type() != target_ty { + result = env.builder.build_int_cast(result, target_ty, "cast_result").unwrap(); + } + } + + return result.as_basic_value_enum(); + } + + (BasicValueEnum::PointerValue(lp), BasicValueEnum::IntValue(ri)) => { + let i64_ty = env.context.i64_type(); + let li = env.builder.build_ptr_to_int(lp, i64_ty, "l_ptr2int").unwrap(); + + let ri = if ri.get_type().get_bit_width() == 64 { + ri + } else { + env.builder.build_int_cast(ri, i64_ty, "r_i64").unwrap() + }; + + let mut result = match operator { + Operator::Equal => env.builder.build_int_compare(IntPredicate::EQ, li, ri, "ptreq0").unwrap(), + Operator::NotEqual => env.builder.build_int_compare(IntPredicate::NE, li, ri, "ptrne0").unwrap(), + _ => panic!("Unsupported ptr/int operator: {:?}", operator), + }; + + if let Some(inkwell::types::BasicTypeEnum::IntType(target_ty)) = expected_type { + if result.get_type() != target_ty { + result = env.builder.build_int_cast(result, target_ty, "cast_result").unwrap(); + } + } + + return result.as_basic_value_enum(); + } + + (BasicValueEnum::IntValue(li), BasicValueEnum::PointerValue(rp)) => { + let i64_ty = env.context.i64_type(); + let li = if li.get_type().get_bit_width() == 64 { + li + } else { + env.builder.build_int_cast(li, i64_ty, "l_i64").unwrap() + }; + + let ri = env.builder.build_ptr_to_int(rp, i64_ty, "r_ptr2int").unwrap(); + + let mut result = match operator { + Operator::Equal => env.builder.build_int_compare(IntPredicate::EQ, li, ri, "ptreq0").unwrap(), + Operator::NotEqual => env.builder.build_int_compare(IntPredicate::NE, li, ri, "ptrne0").unwrap(), + _ => panic!("Unsupported int/ptr operator: {:?}", operator), + }; + + if let Some(inkwell::types::BasicTypeEnum::IntType(target_ty)) = expected_type { + if result.get_type() != target_ty { + result = env.builder.build_int_cast(result, target_ty, "cast_result").unwrap(); + } + } + + return result.as_basic_value_enum(); + } _ => panic!("Type mismatch in binary expression"), } diff --git a/llvm_temporary/src/llvm_temporary/expression/rvalue/calls.rs b/llvm_temporary/src/llvm_temporary/expression/rvalue/calls.rs index ac67e02e..7b60f709 100644 --- a/llvm_temporary/src/llvm_temporary/expression/rvalue/calls.rs +++ b/llvm_temporary/src/llvm_temporary/expression/rvalue/calls.rs @@ -1,9 +1,32 @@ -use inkwell::types::BasicTypeEnum; +use inkwell::types::{AsTypeRef, BasicTypeEnum}; use super::ExprGenEnv; use inkwell::values::{BasicMetadataValueEnum, BasicValue, BasicValueEnum}; use parser::ast::{Expression, WaveType}; use crate::llvm_temporary::statement::variable::{coerce_basic_value, CoercionMode}; +fn normalize_struct_name(raw: &str) -> &str { + raw.strip_prefix("struct.").unwrap_or(raw).trim_start_matches('%') +} + +fn resolve_struct_key<'ctx>( + st: inkwell::types::StructType<'ctx>, + struct_types: &std::collections::HashMap>, +) -> String { + if let Some(raw) = st.get_name().and_then(|n| n.to_str().ok()) { + return normalize_struct_name(raw).to_string(); + } + + let st_ref = st.as_type_ref(); + for (name, ty) in struct_types { + if ty.as_type_ref() == st_ref { + return name.clone(); + } + } + + panic!("LLVM struct type has no name and cannot be matched to struct_types"); +} + + pub(crate) fn gen_method_call<'ctx, 'a>( env: &mut ExprGenEnv<'ctx, 'a>, object: &Expression, @@ -59,6 +82,67 @@ pub(crate) fn gen_method_call<'ctx, 'a>( } } + { + let obj_preview = env.gen(object, None); + let obj_ty = obj_preview.get_type(); + + let struct_name_opt: Option = match obj_ty { + BasicTypeEnum::StructType(st) => Some(resolve_struct_key(st, env.struct_types)), + BasicTypeEnum::PointerType(p) if p.get_element_type().is_struct_type() => { + Some(resolve_struct_key(p.get_element_type().into_struct_type(), env.struct_types)) + } + _ => None, + }; + + if let Some(struct_name) = struct_name_opt { + let fn_name = format!("{}_{}", struct_name, name); + + if let Some(function) = env.module.get_function(&fn_name) { + let fn_type = function.get_type(); + let param_types = fn_type.get_param_types(); + let expected_self = param_types.get(0).cloned(); + + let mut obj_val = obj_preview; + if let Some(et) = expected_self { + obj_val = coerce_basic_value( + env.context, + env.builder, + obj_val, + et, + "self_cast", + CoercionMode::Implicit, + ); + } + + let mut call_args: Vec = Vec::new(); + call_args.push(obj_val.into()); + + for (i, arg_expr) in args.iter().enumerate() { + let expected_ty = param_types.get(i + 1).cloned(); + let mut arg_val = env.gen(arg_expr, expected_ty); + if let Some(et) = expected_ty { + arg_val = coerce_basic_value( + env.context, env.builder, arg_val, et, &format!("arg{}_cast", i), + CoercionMode::Implicit + ); + } + call_args.push(arg_val.into()); + } + + let call_site = env + .builder + .build_call(function, &call_args, &format!("call_{}", fn_name)) + .unwrap(); + + if function.get_type().get_return_type().is_some() { + return call_site.try_as_basic_value().left().unwrap(); + } else { + return env.context.i32_type().const_zero().as_basic_value_enum(); + } + } + } + } + // method-style call: fn(self, ...) let function = env .module diff --git a/llvm_temporary/src/llvm_temporary/expression/rvalue/structs.rs b/llvm_temporary/src/llvm_temporary/expression/rvalue/structs.rs index d0439f39..2270c681 100644 --- a/llvm_temporary/src/llvm_temporary/expression/rvalue/structs.rs +++ b/llvm_temporary/src/llvm_temporary/expression/rvalue/structs.rs @@ -1,6 +1,7 @@ use super::ExprGenEnv; use inkwell::values::{BasicValue, BasicValueEnum}; use parser::ast::{Expression, WaveType}; +use crate::llvm_temporary::llvm_codegen::generate_address_ir; pub(crate) fn gen_struct_literal<'ctx, 'a>( env: &mut ExprGenEnv<'ctx, 'a>, @@ -27,7 +28,11 @@ pub(crate) fn gen_struct_literal<'ctx, 'a>( .get(field_name) .unwrap_or_else(|| panic!("Field '{}' not found in struct '{}'", field_name, name)); - let field_val = env.gen(field_expr, None); + let expected_field_ty = struct_ty + .get_field_type_at_index(*idx) + .unwrap_or_else(|| panic!("No field type at index {} for struct '{}'", idx, name)); + + let field_val = env.gen(field_expr, Some(expected_field_ty)); let field_ptr = env .builder @@ -48,40 +53,24 @@ pub(crate) fn gen_field_access<'ctx, 'a>( object: &Expression, field: &str, ) -> BasicValueEnum<'ctx> { - let var_name = match object { - Expression::Variable(name) => name, - other => panic!("FieldAccess on non-variable object not supported yet: {:?}", other), - }; - - let var_info = env - .variables - .get(var_name) - .unwrap_or_else(|| panic!("Variable '{}' not found for field access", var_name)); - - let struct_name = match &var_info.ty { - WaveType::Struct(name) => name, - other_ty => panic!( - "Field access on non-struct type {:?} for variable '{}'", - other_ty, var_name - ), + let full = Expression::FieldAccess { + object: Box::new(object.clone()), + field: field.to_string(), }; - let field_indices = env - .struct_field_indices - .get(struct_name) - .unwrap_or_else(|| panic!("Field index map for struct '{}' not found", struct_name)); - - let idx = field_indices - .get(field) - .unwrap_or_else(|| panic!("Field '{}' not found in struct '{}'", field, struct_name)); - - let field_ptr = env - .builder - .build_struct_gep(var_info.ptr, *idx, &format!("{}.{}", var_name, field)) - .unwrap(); + let ptr = generate_address_ir( + env.context, + env.builder, + &full, + env.variables, + env.module, + env.struct_types, + env.struct_field_indices, + ); env.builder - .build_load(field_ptr, &format!("load_{}_{}", var_name, field)) + .build_load(ptr, &format!("load_field_{}", field)) .unwrap() .as_basic_value_enum() } + diff --git a/llvm_temporary/src/llvm_temporary/llvm_codegen/format.rs b/llvm_temporary/src/llvm_temporary/llvm_codegen/format.rs index b6577140..0fdff26e 100644 --- a/llvm_temporary/src/llvm_temporary/llvm_codegen/format.rs +++ b/llvm_temporary/src/llvm_temporary/llvm_codegen/format.rs @@ -12,14 +12,21 @@ pub fn wave_format_to_c<'ctx>( while let Some(c) = chars.next() { if c == '{' { - if let Some('}') = chars.peek() { + let mut spec = String::new(); + while let Some(&p) = chars.peek() { chars.next(); // consume '}' + if p == '}' { break; } + spec.push(p); + } - let ty = arg_types - .get(arg_index) - .unwrap_or_else(|| panic!("Missing argument for format")); + let spec = spec.trim(); - let fmt = match ty { + let ty = arg_types + .get(arg_index) + .unwrap_or_else(|| panic!("Missing argument for format")); + + let fmt = if spec.is_empty() { + match ty { BasicTypeEnum::IntType(int_ty) => { let bits = int_ty.get_bit_width(); match bits { @@ -38,14 +45,29 @@ pub fn wave_format_to_c<'ctx>( "%lf" } } - BasicTypeEnum::PointerType(_) => "%p", + BasicTypeEnum::PointerType(ptr_ty) => { + let elem = ptr_ty.get_element_type(); + if elem.is_int_type() && elem.into_int_type().get_bit_width() == 8 { + "%s" // i8* => C string + } else { + "%p" + } + } _ => panic!("Unsupported type in format"), - }; - - result.push_str(fmt); - arg_index += 1; - continue; - } + } + } else { + match spec { + "c" => "%c", + "x" => "%x", + "p" => "%p", + "s" => "%s", + "d" => "%d", + _ => panic!("Unknown format spec: {{{}}}", spec), + } + }; + result.push_str(fmt); + arg_index += 1; + continue; } result.push(c); diff --git a/llvm_temporary/src/llvm_temporary/llvm_codegen/ir.rs b/llvm_temporary/src/llvm_temporary/llvm_codegen/ir.rs index dc2a5f46..f073a917 100644 --- a/llvm_temporary/src/llvm_temporary/llvm_codegen/ir.rs +++ b/llvm_temporary/src/llvm_temporary/llvm_codegen/ir.rs @@ -35,7 +35,6 @@ pub unsafe fn generate_ir(ast_nodes: &[ASTNode]) -> String { let pass_manager: PassManager = PassManager::create(()); pass_manager_builder.populate_module_pass_manager(&pass_manager); - let struct_types: HashMap = HashMap::new(); let mut struct_field_indices: HashMap> = HashMap::new(); let mut global_consts: HashMap = HashMap::new(); @@ -59,15 +58,11 @@ pub unsafe fn generate_ir(ast_nodes: &[ASTNode]) -> String { } let mut struct_types: HashMap = HashMap::new(); + for ast in ast_nodes { if let ASTNode::Struct(struct_node) = ast { - let field_types: Vec = struct_node - .fields - .iter() - .map(|(_, ty)| wave_type_to_llvm_type(context, ty, &struct_types)) - .collect(); - let struct_ty = context.struct_type(&field_types, false); - struct_types.insert(struct_node.name.clone(), struct_ty); + let st = context.opaque_struct_type(&struct_node.name); + struct_types.insert(struct_node.name.clone(), st); let mut index_map = HashMap::new(); for (i, (field_name, _)) in struct_node.fields.iter().enumerate() { @@ -77,6 +72,22 @@ pub unsafe fn generate_ir(ast_nodes: &[ASTNode]) -> String { } } + for ast in ast_nodes { + if let ASTNode::Struct(struct_node) = ast { + let st = *struct_types + .get(&struct_node.name) + .unwrap_or_else(|| panic!("Opaque struct missing: {}", struct_node.name)); + + let field_types: Vec = struct_node + .fields + .iter() + .map(|(_, ty)| wave_type_to_llvm_type(context, ty, &struct_types)) + .collect(); + + st.set_body(&field_types, false); + } + } + let mut proto_functions: Vec<(String, FunctionNode)> = Vec::new(); for ast in ast_nodes { if let ASTNode::ProtoImpl(proto_impl) = ast { diff --git a/llvm_temporary/src/llvm_temporary/statement/control.rs b/llvm_temporary/src/llvm_temporary/statement/control.rs index 08c03cf3..f8e85971 100644 --- a/llvm_temporary/src/llvm_temporary/statement/control.rs +++ b/llvm_temporary/src/llvm_temporary/statement/control.rs @@ -7,6 +7,7 @@ use inkwell::values::{BasicValue, BasicValueEnum, FunctionValue}; use inkwell::{FloatPredicate, IntPredicate}; use parser::ast::{ASTNode, Expression}; use std::collections::HashMap; +use crate::llvm_temporary::statement::variable::{coerce_basic_value, CoercionMode}; fn truthy_to_i1<'ctx>( context: &'ctx inkwell::context::Context, @@ -318,62 +319,46 @@ pub(super) fn gen_return_ir<'ctx>( struct_types: &HashMap>, struct_field_indices: &HashMap>, ) { - if let Some(expr) = expr_opt { - let ret_type = current_function - .get_type() - .get_return_type() - .expect("Function should have a return type"); - let expected_type: BasicTypeEnum<'ctx> = ret_type - .try_into() - .expect("Failed to convert return type to BasicTypeEnum"); - - let value = generate_expression_ir( - context, - builder, - expr, - variables, - module, - Some(expected_type), - global_consts, - struct_types, - struct_field_indices, - ); + let expected_ret = current_function.get_type().get_return_type(); // Option - let value = match value { - BasicValueEnum::PointerValue(ptr) => { - let ty = ptr.get_type().get_element_type(); - match ty { - AnyTypeEnum::PointerType(_) => builder - .build_load(ptr, "load_ret") - .unwrap() - .as_basic_value_enum(), - _ => ptr.as_basic_value_enum(), - } - } - other => other, - }; + match (expected_ret, expr_opt) { + (None, None) => { + builder.build_return(None).unwrap(); + } - let casted_value = match (value, expected_type) { - (BasicValueEnum::PointerValue(ptr), BasicTypeEnum::IntType(_)) => builder - .build_ptr_to_int(ptr, expected_type.into_int_type(), "ptr_to_int") - .unwrap() - .as_basic_value_enum(), - (BasicValueEnum::PointerValue(ptr), BasicTypeEnum::PointerType(_)) => { - ptr.as_basic_value_enum() + (None, Some(_)) => { + panic!("Void function cannot return a value"); + } + + (Some(_), None) => { + panic!("Non-void function must return a value"); + } + + (Some(ret_ty), Some(expr)) => { + let mut v = generate_expression_ir( + context, + builder, + expr, + variables, + module, + Some(ret_ty), + global_consts, + struct_types, + struct_field_indices, + ); + + if v.get_type() != ret_ty { + v = coerce_basic_value( + context, + builder, + v, + ret_ty, + "ret_cast", + CoercionMode::Implicit, + ); } - (BasicValueEnum::FloatValue(v), BasicTypeEnum::IntType(t)) => builder - .build_float_to_signed_int(v, t, "float_to_int") - .unwrap() - .as_basic_value_enum(), - (BasicValueEnum::IntValue(v), BasicTypeEnum::FloatType(t)) => builder - .build_signed_int_to_float(v, t, "int_to_float") - .unwrap() - .as_basic_value_enum(), - _ => value, - }; - builder.build_return(Some(&casted_value)).unwrap(); - } else { - builder.build_return(None).unwrap(); + builder.build_return(Some(&v)).unwrap(); + } } -} +} \ No newline at end of file diff --git a/llvm_temporary/src/llvm_temporary/statement/io.rs b/llvm_temporary/src/llvm_temporary/statement/io.rs index 3d1d0eac..9bd3b47e 100644 --- a/llvm_temporary/src/llvm_temporary/statement/io.rs +++ b/llvm_temporary/src/llvm_temporary/statement/io.rs @@ -142,6 +142,17 @@ pub(super) fn gen_print_format_ir<'ctx>( for value in arg_vals { let casted_value = match value { + BasicValueEnum::IntValue(iv) => { + let bw = iv.get_type().get_bit_width(); + if bw < 32 { + builder + .build_int_z_extend(iv, context.i32_type(), "int_promote") + .unwrap() + .as_basic_value_enum() + } else { + value + } + } BasicValueEnum::PointerValue(ptr_val) => { let element_ty = ptr_val.get_type().get_element_type(); if element_ty.is_int_type() && element_ty.into_int_type().get_bit_width() == 8 { diff --git a/test/test56.wave b/test/test56.wave index 0f3effa7..73a2f062 100644 --- a/test/test56.wave +++ b/test/test56.wave @@ -1,37 +1,50 @@ -fun syscall1(id: i64) -> i64 { - var ret_val: i64; +// ========================= +// Linux x86_64 syscalls +// args: rax, rdi, rsi, rdx, r10, r8, r9 +// ========================= + +fun len(s: str) -> i32 { + let mut i: i32 = 0; + while (s[i] != 0) { + i += 1; + } + return i; +} + +fun syscall0(id: i64) -> i64 { + var ret: i64; asm { "syscall" in("rax") id - out("rax") ret_val + out("rax") ret } - return ret_val; + return ret; } -fun syscall2(id: i64, arg1: i64) -> i64 { - var ret_val: i64; +fun syscall1(id: i64, a1: i64) -> i64 { + var ret: i64; asm { "syscall" in("rax") id - in("rdi") arg1 - out("rax") ret_val + in("rdi") a1 + out("rax") ret } - return ret_val; + return ret; } -fun syscall3(id: i64, arg1: i64, arg2: i64) -> i64 { - var ret_val: i64; +fun syscall2(id: i64, a1: i64, a2: i64) -> i64 { + var ret: i64; asm { "syscall" in("rax") id - in("rdi") arg1 - in("rsi") arg2 - out("rax") ret_val + in("rdi") a1 + in("rsi") a2 + out("rax") ret } - return ret_val; + return ret; } -fun syscall4i(id: i64, a1: i64, a2: i64, a3: i64) -> i64 { +fun syscall3(id: i64, a1: i64, a2: i64, a3: i64) -> i64 { var ret: i64; asm { "syscall" @@ -44,7 +57,20 @@ fun syscall4i(id: i64, a1: i64, a2: i64, a3: i64) -> i64 { return ret; } -fun syscall4p(id: i64, a1: i64, a2: ptr, a3: i64) -> i64 { +fun syscall3pi(id: i64, a1: i64, p2: ptr, a3: i64) -> i64 { + var ret: i64; + asm { + "syscall" + in("rax") id + in("rdi") a1 + in("rsi") p2 + in("rdx") a3 + out("rax") ret + } + return ret; +} + +fun syscall4(id: i64, a1: i64, a2: i64, a3: i64, a4: i64) -> i64 { var ret: i64; asm { "syscall" @@ -52,37 +78,112 @@ fun syscall4p(id: i64, a1: i64, a2: ptr, a3: i64) -> i64 { in("rdi") a1 in("rsi") a2 in("rdx") a3 + in("r10") a4 out("rax") ret } return ret; } -fun _socket_create() -> i32 { - return syscall3(41, 2, 1); +fun syscall5(id: i64, a1: i64, a2: i64, a3: i64, p4: ptr, a5: i64) -> i64 { + var ret: i64; + asm { + "syscall" + in("rax") id + in("rdi") a1 + in("rsi") a2 + in("rdx") a3 + in("r10") p4 + in("r8") a5 + out("rax") ret + } + return ret; +} + + +// ========================= +// socket helpers +// ========================= + +fun htons(x: i16) -> i16 { + var a: i32 = x; + var y: i32 = ((a & 255) << 8) | ((a >> 8) & 255); + return y; +} + +fun _setsockopt_reuseaddr(sockfd: i64) { + // SOL_SOCKET = 1, SO_REUSEADDR = 2 + var one: i32 = 1; + syscall5(54, sockfd, 1, 2, &one, 4); +} + +struct SockAddrIn { + sin_family: i16; // AF_INET = 2 + sin_port: i16; // network byte order + sin_addr: i32; // INADDR_ANY = 0 + sin_zero: array; } -fun _socket_bind(sockfd: i32, ip_addr: i32, port: i16) -> i32 { - var result: i64; +// bind needs sockaddr* typed pointer. +// If your compiler cannot pass ptr into ptr, +// keep this specialized syscall for sockaddr. +fun syscall3p_sockaddr(id: i64, a1: i64, p2: ptr, a3: i64) -> i64 { + var ret: i64; asm { - out("rax") result - in("rdi") sockfd + "syscall" + in("rax") id + in("rdi") a1 + in("rsi") p2 + in("rdx") a3 + out("rax") ret } - return result; + return ret; +} + +fun _socket_create_tcp() -> i64 { + return syscall3(41, 2, 1, 0); +} + +fun _socket_bind_any(sockfd: i64, port: i16) -> i64 { + let mut addr: SockAddrIn = SockAddrIn { + sin_family: 2, + sin_port: htons(port), + sin_addr: 0, + sin_zero: [0,0,0,0,0,0,0,0] + }; + + return syscall3p_sockaddr(49, sockfd, &addr, 16); +} + +fun _socket_listen(sockfd: i64, backlog: i64) -> i64 { + return syscall2(50, sockfd, backlog); +} + +fun _socket_accept(sockfd: i64) -> i64 { + return syscall3(43, sockfd, 0, 0); } -fun _socket_listen(sockfd: i32, backlog: i32) -> i32 { - return syscall3(50, sockfd, backlog); +fun _socket_close(fd: i64) { + syscall1(3, fd); } -fun _socket_close(sockfd: i32) { - syscall2(3, sockfd); +fun _write(fd: i64, buf: str, len: i64) -> i64 { + // If your compiler represents `str` as i8*, this will work with syscall3pi. + return syscall3pi(1, fd, buf, len); } -fun new_server(ip_str: str, port: i16) -> i32 { - var sockfd: i32 = _socket_create(); +// ========================= +// server logic +// ========================= + +fun new_server(port: i16) -> i64 { + var sockfd: i64 = _socket_create_tcp(); + _setsockopt_reuseaddr(sockfd); if (sockfd < 0) { return -1; } - if (_socket_bind(sockfd, 0, port) < 0) { + var b: i64 = _socket_bind_any(sockfd, port); + println("bind ret = {}", b); + + if (b < 0) { _socket_close(sockfd); return -1; } @@ -90,7 +191,7 @@ fun new_server(ip_str: str, port: i16) -> i32 { return sockfd; } -fun listen(server_fd: i32, backlog: i32) -> i32 { +fun listen(server_fd: i64, backlog: i64) -> i64 { if (_socket_listen(server_fd, backlog) < 0) { println("Error: Failed to listen on socket."); return -1; @@ -99,38 +200,37 @@ fun listen(server_fd: i32, backlog: i32) -> i32 { return server_fd; } -fun start(server_fd: i32) { +fun start(server_fd: i64) { println("Server accepting connections..."); while (true) { - var client_fd: i32 = syscall3(43, server_fd, 0); + var client_fd: i64 = _socket_accept(server_fd); if (client_fd < 0) { - println("Error: Failed to accept connection."); + println("accept failed: {}", client_fd); continue; } println("Client connected! fd: {}", client_fd); - var response: str = "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\n\r\nWelcome to the Wave HTTP Server!"; + var response: str = "HTTP/1.1 200 OK\r\nContent-Type: text/plain; charset=utf-8\r\nContent-Length: 33\r\nConnection: close\r\n\r\nWelcome to the Wave HTTP Server!"; + - syscall4p(1, client_fd, response, 82); + _write(client_fd, response, len(response)); _socket_close(client_fd); println("Client disconnected."); } } - fun main() { println("--- Wave HTTP Server Application ---"); - var server_instance: i32 = new_server("0.0.0.0", 8080) - .listen(10); + var server_instance: i64 = new_server(8080).listen(10); if (server_instance >= 0) { server_instance.start(); } else { println("Server failed to start."); } -} \ No newline at end of file +} diff --git a/test/test75.wave b/test/test75.wave index 20586b1b..ea8ed473 100644 --- a/test/test75.wave +++ b/test/test75.wave @@ -17,8 +17,8 @@ fun string_bytes(s: str) -> Bytes { return b; } -fun count_placeholders(input: str) -> i32 { - var bytes: Bytes = string_bytes(input); +fun count_placeholders(text: str) -> i32 { + var bytes: Bytes = string_bytes(text); var i: i32 = 0; var count: i32 = 0; diff --git a/test/test76.wave b/test/test76.wave new file mode 100644 index 00000000..d60ee1eb --- /dev/null +++ b/test/test76.wave @@ -0,0 +1,5 @@ +fun main() { + println(""); + println("a"); + println("a\nb"); +} diff --git a/tools/run_tests.py b/tools/run_tests.py index 0f29b20d..646ac70e 100644 --- a/tools/run_tests.py +++ b/tools/run_tests.py @@ -48,6 +48,49 @@ def send_udp_for_test61(): sock.sendto(b"hello from python\n", ("127.0.0.1", 8080)) sock.close() +def run_test56_server(cmd): + print(f"{BLUE}RUN test56.wave (server test){RESET}") + + proc = subprocess.Popen( + cmd, + cwd=str(ROOT), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True + ) + + try: + time.sleep(1.0) # server boot wait + + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.settimeout(2) + s.connect(("127.0.0.1", 8080)) + s.sendall(b"GET / HTTP/1.1\r\nHost: localhost\r\n\r\n") + + data = s.recv(4096) + s.close() + + if b"Welcome to the Wave HTTP Server!" in data: + print(f"{GREEN}→ PASS (server responded){RESET}\n") + return 1 + else: + print(f"{RED}→ FAIL (unexpected response){RESET}") + print(data) + return 0 + + except Exception as e: + print(f"{RED}→ FAIL (server not responding){RESET}") + print(e) + return 0 + + finally: + proc.terminate() + try: + proc.wait(timeout=1) + except subprocess.TimeoutExpired: + proc.kill() + + def looks_like_fail(stderr: str) -> bool: if not stderr: return False @@ -76,6 +119,9 @@ def run_and_classify(name, cmd): if name == "test74.wave": stdin_data = "10\n" + if name == "test56.wave": + return run_test56_server(cmd) + try: if name == "test61.wave": threading.Thread( diff --git a/utils/src/formatx.rs b/utils/src/formatx.rs index f51c7c62..a7c25536 100644 --- a/utils/src/formatx.rs +++ b/utils/src/formatx.rs @@ -4,6 +4,38 @@ // This module replaces regex usage for placeholder detection. // Supported pattern: `{ ... }` (non-nested, no escape) +#[derive(Debug, Clone)] +pub struct Placeholder { + pub spec: String, +} + +// "{c}" -> spec="c", "{}" -> spec="" +pub fn parse_placeholders(input: &str) -> Vec { + let bytes = input.as_bytes(); + let mut i = 0; + let mut out = Vec::new(); + + while i < bytes.len() { + if bytes[i] == b'{' { + i += 1; + let start = i; + while i < bytes.len() && bytes[i] != b'}' { + i += 1; + } + if i >= bytes.len() { break; } + + let spec = input[start..i].trim().to_string(); + out.push(Placeholder { spec }); + + i += 1; // consume '}' + } else { + i += 1; + } + } + + out +} + /// Count `{...}` placeholders in the given string. /// /// Equivalent to the regex pattern: `\{[^}]*\}`