diff --git a/ghostscope-compiler/src/ebpf/codegen.rs b/ghostscope-compiler/src/ebpf/codegen.rs deleted file mode 100644 index b353bea..0000000 --- a/ghostscope-compiler/src/ebpf/codegen.rs +++ /dev/null @@ -1,5459 +0,0 @@ -//! Code generation for instructions -//! -//! This module handles the conversion from statements to compiled instructions -//! and generates LLVM IR for individual instructions. - -use super::context::{CodeGenError, EbpfContext, Result, RuntimeAddress}; -use crate::script::{PrintStatement, Program, Statement}; -use aya_ebpf_bindings::bindings::bpf_func_id::BPF_FUNC_probe_read_user; -use ghostscope_protocol::trace_event::{ - BacktraceData, EndInstructionData, InstructionHeader, PrintComplexFormatData, - PrintComplexVariableData, PrintStringIndexData, PrintVariableIndexData, VariableStatus, -}; -use ghostscope_protocol::{InstructionType, TraceContext, TypeKind}; -use inkwell::values::{BasicValueEnum, IntValue}; -use inkwell::AddressSpace; -use std::collections::HashMap; -use tracing::{debug, info, warn}; - -/// Parameters for generating a PrintComplexVariable with runtime read -#[derive(Debug, Clone)] -struct PrintVarRuntimeMeta { - var_name_index: u16, - type_index: u16, - access_path: String, - data_len_limit: usize, -} - -/// Source for complex formatted argument data -#[derive(Debug, Clone)] -enum ComplexArgSource<'ctx> { - RuntimeRead { - address: ghostscope_dwarf::PlannedAddress, - dwarf_type: ghostscope_dwarf::TypeInfo, - module_for_offsets: Option, - }, - /// Memory dump from a pointer/byte address with a static length - MemDump { - address: RuntimeAddress<'ctx>, - len: usize, - }, - /// Memory dump with dynamic runtime length; bytes read up to min(len_value, max_len) - MemDumpDynamic { - address: RuntimeAddress<'ctx>, - len_value: inkwell::values::IntValue<'ctx>, - max_len: usize, - }, - ImmediateBytes { - bytes: Vec, - }, - AddressValue { - address: ghostscope_dwarf::PlannedAddress, - module_for_offsets: Option, - }, - // Newly added: a value computed in LLVM at runtime (e.g., expression result) - ComputedInt { - value: inkwell::values::IntValue<'ctx>, - byte_len: usize, // typically 8 - }, -} - -/// Argument descriptor for PrintComplexFormat -#[derive(Debug, Clone)] -struct ComplexArg<'ctx> { - var_name_index: u16, - type_index: u16, - access_path: Vec, - data_len: usize, - source: ComplexArgSource<'ctx>, -} - -const DYNAMIC_READ_ERROR_PAYLOAD_LEN: usize = 12; - -fn print_complex_format_instruction_budget( - max_trace_event_size: usize, - bytes_reserved_so_far: usize, -) -> usize { - let end_instruction_size = - std::mem::size_of::() + std::mem::size_of::(); - let event_budget = max_trace_event_size - .saturating_sub(bytes_reserved_so_far) - .saturating_sub(end_instruction_size); - let instruction_budget_cap = std::mem::size_of::() + u16::MAX as usize; - event_budget.min(instruction_budget_cap) -} - -fn distribute_budget_fairly(caps: &[usize], budget: usize) -> Vec { - let mut allocations = vec![0; caps.len()]; - let mut active: Vec = caps - .iter() - .enumerate() - .filter_map(|(idx, cap)| (*cap > 0).then_some(idx)) - .collect(); - let mut remaining = budget; - - while remaining > 0 && !active.is_empty() { - let share = remaining / active.len(); - if share == 0 { - for &idx in active.iter().take(remaining) { - allocations[idx] += 1; - } - break; - } - - let mut consumed = 0usize; - let mut next_active = Vec::with_capacity(active.len()); - for idx in active { - let cap_left = caps[idx].saturating_sub(allocations[idx]); - let take = share.min(cap_left); - allocations[idx] += take; - consumed += take; - if allocations[idx] < caps[idx] { - next_active.push(idx); - } - } - - if consumed == 0 { - break; - } - - remaining = remaining.saturating_sub(consumed); - active = next_active; - } - - allocations -} - -fn allocate_dynamic_payload_reservations(max_lens: &[usize], available: usize) -> Vec { - if max_lens.is_empty() || available == 0 { - return vec![0; max_lens.len()]; - } - - let base_caps = vec![DYNAMIC_READ_ERROR_PAYLOAD_LEN; max_lens.len()]; - let base_budget = available.min(DYNAMIC_READ_ERROR_PAYLOAD_LEN.saturating_mul(max_lens.len())); - let mut reservations = distribute_budget_fairly(&base_caps, base_budget); - let remaining_budget = available.saturating_sub(reservations.iter().sum::()); - if remaining_budget == 0 { - return reservations; - } - - let extra_caps: Vec = max_lens - .iter() - .zip(reservations.iter()) - .map(|(max_len, reserved)| { - max_len - .max(&DYNAMIC_READ_ERROR_PAYLOAD_LEN) - .saturating_sub(*reserved) - }) - .collect(); - let extras = distribute_budget_fairly(&extra_caps, remaining_budget); - for (reservation, extra) in reservations.iter_mut().zip(extras) { - *reservation += extra; - } - - reservations -} - -impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { - const UNKNOWN_CHAR_ARRAY_READ_FALLBACK: usize = 256; - - fn build_errno_i32(&self, ret: IntValue<'ctx>, name: &str) -> Result> { - let i32_ty = self.context.i32_type(); - match ret.get_type().get_bit_width().cmp(&32) { - std::cmp::Ordering::Greater => self - .builder - .build_int_truncate(ret, i32_ty, name) - .map_err(|e| CodeGenError::LLVMError(e.to_string())), - std::cmp::Ordering::Less => self - .builder - .build_int_s_extend(ret, i32_ty, name) - .map_err(|e| CodeGenError::LLVMError(e.to_string())), - std::cmp::Ordering::Equal => Ok(ret), - } - } - - fn complex_arg_from_dwarf_read_plan( - &mut self, - plan: ghostscope_dwarf::VariableReadPlan, - display_name: Option, - ) -> Result> { - let pc_address = self.get_compile_time_context()?.pc_address; - let materialized = self.variable_read_plan_to_materialization(plan, pc_address)?; - let display_name = display_name.unwrap_or_else(|| materialized.name.clone()); - - match &materialized.materialization { - ghostscope_dwarf::VariableMaterialization::Unavailable { - availability: ghostscope_dwarf::Availability::OptimizedOut, - } => { - let optimized_type = ghostscope_dwarf::TypeInfo::OptimizedOut { - name: materialized.name.clone(), - }; - Ok(ComplexArg { - var_name_index: self.trace_context.add_variable_name(display_name), - type_index: self.trace_context.add_type(optimized_type), - access_path: Vec::new(), - data_len: 0, - source: ComplexArgSource::ImmediateBytes { bytes: Vec::new() }, - }) - } - ghostscope_dwarf::VariableMaterialization::Unavailable { availability } => { - Err(Self::dwarf_expression_unavailable_error( - &materialized.name, - availability, - pc_address, - )) - } - ghostscope_dwarf::VariableMaterialization::UserMemoryRead { address } => { - let dwarf_type = materialized.dwarf_type.clone().ok_or_else(|| { - CodeGenError::DwarfError( - "Expression has no DWARF type information".to_string(), - ) - })?; - let data_len = Self::compute_read_size_for_type(&dwarf_type); - if data_len == 0 { - return Err(CodeGenError::TypeSizeNotAvailable(display_name)); - } - let module_hint = - Self::module_path_for_offsets(materialized.module_path.as_deref()); - Ok(ComplexArg { - var_name_index: self.trace_context.add_variable_name(display_name), - type_index: self.trace_context.add_type(dwarf_type.clone()), - access_path: Vec::new(), - data_len, - source: ComplexArgSource::RuntimeRead { - address: address.clone(), - dwarf_type, - module_for_offsets: module_hint, - }, - }) - } - ghostscope_dwarf::VariableMaterialization::DirectValue { .. } => { - let value = - self.variable_materialization_to_llvm_value(&materialized, pc_address, None)?; - let dwarf_type = materialized.dwarf_type.clone().ok_or_else(|| { - CodeGenError::DwarfError( - "Expression has no DWARF type information".to_string(), - ) - })?; - let value = match value { - BasicValueEnum::IntValue(value) => value, - BasicValueEnum::PointerValue(value) => self - .builder - .build_ptr_to_int(value, self.context.i64_type(), "direct_ptr_to_i64") - .map_err(|e| CodeGenError::Builder(e.to_string()))?, - _ => { - return Err(CodeGenError::DwarfError(format!( - "direct DWARF value '{}' did not lower to an integer", - materialized.name - ))) - } - }; - let data_len = Self::compute_read_size_for_type(&dwarf_type).clamp(1, 8); - Ok(ComplexArg { - var_name_index: self.trace_context.add_variable_name(display_name), - type_index: self.trace_context.add_type(dwarf_type), - access_path: Vec::new(), - data_len, - source: ComplexArgSource::ComputedInt { value, byte_len: data_len }, - }) - } - ghostscope_dwarf::VariableMaterialization::Composite { .. } => Err( - CodeGenError::DwarfError(format!( - "DWARF variable '{}' is split across pieces; piece reconstruction is not implemented", - materialized.name - )), - ), - } - } - - /// Unified expression resolver: returns a ComplexArg carrying - /// a consistent var_name_index/type_index/access_path/data_len/source - /// with strict priority: script variables -> DWARF (locals/params/globals). - fn resolve_expr_to_arg(&mut self, expr: &crate::script::ast::Expr) -> Result> { - use crate::script::ast::Expr as E; - match expr { - // 0) Alias variables: resolve to address and render as pointer value - E::Variable(name) if self.alias_variable_exists(name) => { - let aliased = self.get_alias_variable(name).expect("alias exists"); - let addr_i64 = self.resolve_ptr_i64_from_expr(&aliased)?; - let var_name_index = self.trace_context.add_variable_name(name.clone()); - Ok(ComplexArg { - var_name_index, - type_index: self.add_synthesized_type_index_for_kind(TypeKind::Pointer), - access_path: Vec::new(), - data_len: 8, - source: ComplexArgSource::ComputedInt { - value: addr_i64, - byte_len: 8, - }, - }) - } - // 1) Script variables first - E::Variable(name) if self.variable_exists(name) => { - let val = self.load_variable(name)?; - let var_name_index = self.trace_context.add_variable_name(name.clone()); - // If this is a string variable, print its contents instead of address - if self - .get_variable_type(name) - .is_some_and(|t| matches!(t, crate::script::VarType::String)) - { - let bytes_opt = self.get_string_variable_bytes(name).cloned(); - if let Some(bytes) = bytes_opt { - // Build a char[] type with length=bytes.len() - let char_type = ghostscope_dwarf::TypeInfo::BaseType { - name: "char".to_string(), - size: 1, - encoding: ghostscope_dwarf::constants::DW_ATE_unsigned_char.0 as u16, - }; - let array_type = ghostscope_dwarf::TypeInfo::ArrayType { - element_type: Box::new(char_type), - element_count: Some(bytes.len() as u64), - total_size: Some(bytes.len() as u64), - }; - return Ok(ComplexArg { - var_name_index, - type_index: self.trace_context.add_type(array_type), - access_path: Vec::new(), - data_len: bytes.len(), - source: ComplexArgSource::ImmediateBytes { bytes }, - }); - } - } - match val { - BasicValueEnum::IntValue(iv) => { - // Preserve signedness for display: map bit width to I8/I16/I32/I64 - let bitw = iv.get_type().get_bit_width(); - let (kind, byte_len) = if bitw == 1 { - (TypeKind::Bool, 1) - } else if bitw <= 8 { - (TypeKind::I8, 1) - } else if bitw <= 16 { - (TypeKind::I16, 2) - } else if bitw <= 32 { - (TypeKind::I32, 4) - } else { - (TypeKind::I64, 8) - }; - Ok(ComplexArg { - var_name_index, - type_index: self.add_synthesized_type_index_for_kind(kind), - access_path: Vec::new(), - data_len: byte_len, - source: ComplexArgSource::ComputedInt { - value: iv, - byte_len, - }, - }) - } - BasicValueEnum::PointerValue(pv) => { - // Non-string pointer variable: print as address (hex) - let iv = self - .builder - .build_ptr_to_int(pv, self.context.i64_type(), "ptr_to_i64") - .map_err(|e| CodeGenError::Builder(e.to_string()))?; - Ok(ComplexArg { - var_name_index, - type_index: self.add_synthesized_type_index_for_kind(TypeKind::Pointer), - access_path: Vec::new(), - data_len: 8, - source: ComplexArgSource::ComputedInt { - value: iv, - byte_len: 8, - }, - }) - } - _ => Err(CodeGenError::TypeError( - "Unsupported script variable type for print".to_string(), - )), - } - } - - // 2) String literal -> Immediate bytes (for formatted args) - E::String(s) => { - let mut bytes = s.as_bytes().to_vec(); - bytes.push(0); - let char_type = ghostscope_dwarf::TypeInfo::BaseType { - name: "char".to_string(), - size: 1, - encoding: ghostscope_dwarf::constants::DW_ATE_unsigned_char.0 as u16, - }; - let array_type = ghostscope_dwarf::TypeInfo::ArrayType { - element_type: Box::new(char_type), - element_count: Some(bytes.len() as u64), - total_size: Some(bytes.len() as u64), - }; - Ok(ComplexArg { - var_name_index: self - .trace_context - .add_variable_name("__str_literal".to_string()), - type_index: self.trace_context.add_type(array_type), - access_path: Vec::new(), - data_len: bytes.len(), - source: ComplexArgSource::ImmediateBytes { bytes }, - }) - } - - // 3) Integer literal -> Immediate i64 bytes - E::Int(v) => { - let mut bytes = Vec::with_capacity(8); - bytes.extend_from_slice(&(*v).to_le_bytes()); - let int_type = ghostscope_dwarf::TypeInfo::BaseType { - name: "i64".to_string(), - size: 8, - encoding: ghostscope_dwarf::constants::DW_ATE_signed.0 as u16, - }; - Ok(ComplexArg { - var_name_index: self - .trace_context - .add_variable_name("__int_literal".to_string()), - type_index: self.trace_context.add_type(int_type), - access_path: Vec::new(), - data_len: 8, - source: ComplexArgSource::ImmediateBytes { bytes }, - }) - } - - // 4) AddressOf: return AddressValue (pointer payload will be produced) - E::AddressOf(inner) => { - let var = self - .query_dwarf_for_complex_expr(inner)? - .ok_or_else(|| CodeGenError::VariableNotFound(format!("{inner:?}")))?; - let pc_address = self.get_compile_time_context()?.pc_address; - let materialized = self.variable_read_plan_to_materialization(var, pc_address)?; - let inner_ty = materialized.dwarf_type.as_ref().ok_or_else(|| { - CodeGenError::DwarfError("Expression has no DWARF type information".to_string()) - })?; - let ptr_ty = ghostscope_dwarf::TypeInfo::PointerType { - target_type: Box::new(inner_ty.clone()), - size: 8, - }; - let address = match materialized.materialization { - ghostscope_dwarf::VariableMaterialization::UserMemoryRead { address } => { - address - } - ghostscope_dwarf::VariableMaterialization::Unavailable { availability } => { - return Err(Self::dwarf_expression_unavailable_error( - &materialized.name, - &availability, - pc_address, - )) - } - _ => { - return Err(CodeGenError::DwarfError(format!( - "cannot take address of value-backed DWARF expression '{}'", - materialized.name - ))) - } - }; - let module_hint = - Self::module_path_for_offsets(materialized.module_path.as_deref()); - Ok(ComplexArg { - var_name_index: self - .trace_context - .add_variable_name(self.expr_to_name(expr)), - type_index: self.trace_context.add_type(ptr_ty), - access_path: Vec::new(), - data_len: 8, - source: ComplexArgSource::AddressValue { - address, - module_for_offsets: module_hint, - }, - }) - } - - // 5) Complex lvalue shapes -> DWARF runtime read - expr @ (E::MemberAccess(_, _) - | E::ArrayAccess(_, _) - | E::PointerDeref(_) - | E::ChainAccess(_)) => { - if let E::ArrayAccess(array_expr, index_expr) = expr { - if let Some((BasicValueEnum::IntValue(value), _element_type)) = - self.compile_dynamic_array_access_value(array_expr, index_expr)? - { - let bitw = value.get_type().get_bit_width(); - let (kind, byte_len) = if bitw == 1 { - (TypeKind::Bool, 1) - } else if bitw <= 8 { - (TypeKind::I8, 1) - } else if bitw <= 16 { - (TypeKind::I16, 2) - } else if bitw <= 32 { - (TypeKind::I32, 4) - } else { - (TypeKind::I64, 8) - }; - return Ok(ComplexArg { - var_name_index: self - .trace_context - .add_variable_name(self.expr_to_name(expr)), - type_index: self.add_synthesized_type_index_for_kind(kind), - access_path: Vec::new(), - data_len: byte_len, - source: ComplexArgSource::ComputedInt { value, byte_len }, - }); - } - } - if let E::MemberAccess(obj_expr, field) = expr { - if let Some((BasicValueEnum::IntValue(value), _member_type)) = - self.compile_dynamic_member_access_value(obj_expr, field)? - { - let bitw = value.get_type().get_bit_width(); - let (kind, byte_len) = if bitw == 1 { - (TypeKind::Bool, 1) - } else if bitw <= 8 { - (TypeKind::I8, 1) - } else if bitw <= 16 { - (TypeKind::I16, 2) - } else if bitw <= 32 { - (TypeKind::I32, 4) - } else { - (TypeKind::I64, 8) - }; - return Ok(ComplexArg { - var_name_index: self - .trace_context - .add_variable_name(self.expr_to_name(expr)), - type_index: self.add_synthesized_type_index_for_kind(kind), - access_path: Vec::new(), - data_len: byte_len, - source: ComplexArgSource::ComputedInt { value, byte_len }, - }); - } - } - - let plan = self - .query_dwarf_for_complex_expr_plan(expr)? - .ok_or_else(|| CodeGenError::VariableNotFound(format!("{expr:?}")))?; - let display_name = if matches!(expr, E::PointerDeref(_)) { - Some(self.expr_to_name(expr)) - } else { - None - }; - self.complex_arg_from_dwarf_read_plan(plan, display_name) - } - - // 6) Variable not in script scope → DWARF variable or computed fast-path for simple scalars - E::Variable(name) => { - if let Some(v) = self.query_dwarf_for_variable(name)? { - self.complex_arg_from_dwarf_read_plan(v, None) - } else { - Err(CodeGenError::VariableNotInScope(name.clone())) - } - } - - // 7) Pointer arithmetic (ptr +/- K) → typed runtime read at computed address - E::BinaryOp { .. } => { - // Support: ptr + int, int + ptr, ptr - int (int may be negative) - // Only allow when ptr side resolves to DWARF pointer/array; the offset must be an integer literal for now. - // We emit a RuntimeRead with computed location, preserving the pointed-to DWARF type. - let pointer_arithmetic = self.pointer_arithmetic_parts_expanding_aliases(expr)?; - - // Try DWARF resolution for the pointer side - if let Some((ptr_side, index)) = pointer_arithmetic { - if let Some(var) = self.query_dwarf_for_complex_expr(&ptr_side)? { - if var - .dwarf_type - .as_ref() - .is_some_and(ghostscope_dwarf::is_c_pointer_or_array_type) - { - let pointed_plan = var - .plan_pointer_element_index(index) - .map_err(|err| CodeGenError::DwarfError(err.to_string()))?; - let pc_address = self.get_compile_time_context()?.pc_address; - let materialized = self - .variable_read_plan_to_materialization(pointed_plan, pc_address)?; - let elem_ty = materialized.dwarf_type.clone().ok_or_else(|| { - CodeGenError::DwarfError( - "Expression has no DWARF type information".to_string(), - ) - })?; - let address = - match materialized.materialization { - ghostscope_dwarf::VariableMaterialization::UserMemoryRead { - address, - } => address, - ghostscope_dwarf::VariableMaterialization::Unavailable { - availability, - } => { - return Err(Self::dwarf_expression_unavailable_error( - &materialized.name, - &availability, - pc_address, - )) - } - _ => return Err(CodeGenError::DwarfError( - "pointer arithmetic did not produce an address-backed plan" - .to_string(), - )), - }; - let data_len = Self::compute_read_size_for_type(&elem_ty); - let module_hint = - Self::module_path_for_offsets(materialized.module_path.as_deref()); - if data_len == 0 { - // Fallback for unsized/void targets: print computed address as pointer - let ptr_ti = ghostscope_dwarf::TypeInfo::PointerType { - target_type: Box::new(elem_ty.clone()), - size: 8, - }; - return Ok(ComplexArg { - var_name_index: self - .trace_context - .add_variable_name(self.expr_to_name(expr)), - type_index: self.trace_context.add_type(ptr_ti), - access_path: Vec::new(), - data_len: 8, - source: ComplexArgSource::AddressValue { - address, - module_for_offsets: module_hint, - }, - }); - } - return Ok(ComplexArg { - var_name_index: self - .trace_context - .add_variable_name(self.expr_to_name(expr)), - type_index: self.trace_context.add_type(elem_ty.clone()), - access_path: Vec::new(), - data_len, - source: ComplexArgSource::RuntimeRead { - address, - dwarf_type: elem_ty, - module_for_offsets: module_hint, - }, - }); - } - } - } - - // If pointer side cannot be resolved as DWARF pointer/array, fall back to computed int - let compiled = self.compile_expr(expr)?; - if let BasicValueEnum::IntValue(iv) = compiled { - let bitw = iv.get_type().get_bit_width(); - let (kind, byte_len) = if bitw == 1 { - (TypeKind::Bool, 1) - } else if bitw <= 8 { - (TypeKind::I8, 1) - } else if bitw <= 16 { - (TypeKind::I16, 2) - } else if bitw <= 32 { - (TypeKind::I32, 4) - } else { - (TypeKind::I64, 8) - }; - Ok(ComplexArg { - var_name_index: self - .trace_context - .add_variable_name(self.expr_to_name(expr)), - type_index: self.add_synthesized_type_index_for_kind(kind), - access_path: Vec::new(), - data_len: byte_len, - source: ComplexArgSource::ComputedInt { - value: iv, - byte_len, - }, - }) - } else { - Err(CodeGenError::TypeError( - "Non-integer expression not supported in print".to_string(), - )) - } - } - - // Binary and other rvalue expressions → compile to computed int - other => { - let compiled = self.compile_expr(other)?; - if let BasicValueEnum::IntValue(iv) = compiled { - let bitw = iv.get_type().get_bit_width(); - let (kind, byte_len) = if bitw == 1 { - (TypeKind::Bool, 1) - } else if bitw <= 8 { - (TypeKind::I8, 1) - } else if bitw <= 16 { - (TypeKind::I16, 2) - } else if bitw <= 32 { - (TypeKind::I32, 4) - } else { - (TypeKind::I64, 8) - }; - Ok(ComplexArg { - var_name_index: self - .trace_context - .add_variable_name(self.expr_to_name(other)), - type_index: self.add_synthesized_type_index_for_kind(kind), - access_path: Vec::new(), - data_len: byte_len, - source: ComplexArgSource::ComputedInt { - value: iv, - byte_len, - }, - }) - } else { - Err(CodeGenError::TypeError( - "Non-integer expression not supported in print".to_string(), - )) - } - } - } - } - - /// Emit a single PrintComplexVariable or a single-arg PrintComplexFormat depending on the arg source. - fn emit_print_from_arg(&mut self, arg: ComplexArg<'ctx>) -> Result { - match arg.source { - ComplexArgSource::ComputedInt { value, byte_len } => { - self.generate_print_complex_variable_computed( - arg.var_name_index, - arg.type_index, - byte_len, - value, - )?; - Ok(1) - } - ComplexArgSource::RuntimeRead { - address, - ref dwarf_type, - module_for_offsets, - } => { - let meta = PrintVarRuntimeMeta { - var_name_index: arg.var_name_index, - type_index: arg.type_index, - access_path: String::new(), - data_len_limit: arg.data_len, - }; - self.generate_print_complex_variable_runtime( - meta, - &address, - dwarf_type, - module_for_offsets.as_deref(), - )?; - Ok(1) - } - ComplexArgSource::AddressValue { .. } | ComplexArgSource::ImmediateBytes { .. } => { - // Use ComplexFormat with "{}" to render address/immediate nicely - let fmt_idx = self.trace_context.add_string("{}".to_string()); - self.generate_print_complex_format_instruction(fmt_idx, &[arg])?; - Ok(1) - } - ComplexArgSource::MemDump { .. } | ComplexArgSource::MemDumpDynamic { .. } => { - // Use ComplexFormat with "{}"; generate_print_complex_format_instruction handles MemDump - let fmt_idx = self.trace_context.add_string("{}".to_string()); - self.generate_print_complex_format_instruction(fmt_idx, &[arg])?; - Ok(1) - } - } - } - /// Generate PrintComplexVariable instruction that embeds a computed integer value (no runtime read) - /// This is used for `print expr;` where expr is an rvalue computed in eBPF. - fn generate_print_complex_variable_computed( - &mut self, - var_name_index: u16, - type_index: u16, - byte_len: usize, - value: IntValue<'ctx>, - ) -> Result<()> { - // Build sizes - let header_size = std::mem::size_of::(); - let data_struct_size = std::mem::size_of::(); - let access_path_len: usize = 0; // computed expr has no access path - let total_data_length = data_struct_size + access_path_len + byte_len; - let total_size = header_size + total_data_length; - - // Reserve space directly in the per-CPU accumulation buffer - let inst_buffer = self - .reserve_instruction_region_or_return_zero(total_size as u64)? - .into_value_after_runtime_returns(); - - // Write InstructionHeader.inst_type - let inst_type_val = self - .context - .i8_type() - .const_int(InstructionType::PrintComplexVariable as u64, false); - self.builder - .build_store(inst_buffer, inst_type_val) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store inst_type: {e}")))?; - - // Write data_length (u16) at offset 1 - let data_length_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - inst_buffer, - &[self.context.i32_type().const_int(1, false)], - "data_length_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get data_length GEP: {e}")) - })? - }; - let data_length_ptr_cast = self - .builder - .build_pointer_cast( - data_length_ptr, - self.context.ptr_type(AddressSpace::default()), - "data_length_ptr_cast", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast data_length ptr: {e}")))?; - self.builder - .build_store( - data_length_ptr_cast, - self.context - .i16_type() - .const_int(total_data_length as u64, false), - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store data_length: {e}")))?; - - // Data pointer (after header) - let data_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - inst_buffer, - &[self.context.i32_type().const_int(header_size as u64, false)], - "data_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to get data GEP: {e}")))? - }; - - // var_name_index (u16) - let var_name_index_val = self - .context - .i16_type() - .const_int(var_name_index as u64, false); - let var_name_index_off = - std::mem::offset_of!(PrintComplexVariableData, var_name_index) as u64; - let var_name_index_ptr_i8 = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - data_ptr, - &[self.context.i32_type().const_int(var_name_index_off, false)], - "var_name_index_ptr_i8", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get var_name_index GEP: {e}")) - })? - }; - let var_name_index_ptr_i16 = self - .builder - .build_pointer_cast( - var_name_index_ptr_i8, - self.context.ptr_type(AddressSpace::default()), - "var_name_index_ptr_i16", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to cast var_name_index ptr: {e}")) - })?; - self.builder - .build_store(var_name_index_ptr_i16, var_name_index_val) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store var_name_index: {e}")))?; - - // type_index (u16) - let type_index_offset = std::mem::offset_of!(PrintComplexVariableData, type_index) as u64; - let type_index_ptr_i8 = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - data_ptr, - &[self.context.i32_type().const_int(type_index_offset, false)], - "type_index_ptr_i8", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get type_index GEP: {e}")) - })? - }; - let type_index_ptr = self - .builder - .build_pointer_cast( - type_index_ptr_i8, - self.context.ptr_type(AddressSpace::default()), - "type_index_ptr_i16", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast type_index ptr: {e}")))?; - let type_index_val = self.context.i16_type().const_int(type_index as u64, false); - self.builder - .build_store(type_index_ptr, type_index_val) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store type_index: {e}")))?; - - // access_path_len (u8) = 0 - let access_path_len_off = - std::mem::offset_of!(PrintComplexVariableData, access_path_len) as u64; - let access_path_len_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - data_ptr, - &[self - .context - .i32_type() - .const_int(access_path_len_off, false)], - "access_path_len_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get access_path_len GEP: {e}")) - })? - }; - self.builder - .build_store(access_path_len_ptr, self.context.i8_type().const_zero()) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to store access_path_len: {e}")) - })?; - - // status (u8) = 0 - let status_off = std::mem::offset_of!(PrintComplexVariableData, status) as u64; - let status_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - data_ptr, - &[self.context.i32_type().const_int(status_off, false)], - "status_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to get status GEP: {e}")))? - }; - self.builder - .build_store(status_ptr, self.context.i8_type().const_zero()) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store status: {e}")))?; - - // data_len (u16) - let data_len_off = std::mem::offset_of!(PrintComplexVariableData, data_len) as u64; - let data_len_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - data_ptr, - &[self.context.i32_type().const_int(data_len_off, false)], - "data_len_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to get data_len GEP: {e}")))? - }; - let data_len_ptr_cast = self - .builder - .build_pointer_cast( - data_len_ptr, - self.context.ptr_type(AddressSpace::default()), - "data_len_ptr_cast", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast data_len ptr: {e}")))?; - self.builder - .build_store( - data_len_ptr_cast, - self.context.i16_type().const_int(byte_len as u64, false), - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store data_len: {e}")))?; - - // variable data starts right after PrintComplexVariableData (no access path) - let var_data_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - data_ptr, - &[self - .context - .i32_type() - .const_int(data_struct_size as u64, false)], - "var_data_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to get var_data GEP: {e}")))? - }; - - // Store computed integer value into payload according to byte_len - match byte_len { - 1 => { - let bitw = value.get_type().get_bit_width(); - let v = if bitw == 1 { - // Booleans must serialize as 0/1 - self.builder - .build_int_z_extend(value, self.context.i8_type(), "expr_zext_bool_i8") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - } else if bitw < 8 { - self.builder - .build_int_s_extend(value, self.context.i8_type(), "expr_sext_i8") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - } else if bitw > 8 { - self.builder - .build_int_truncate(value, self.context.i8_type(), "expr_trunc_i8") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - } else { - value - }; - self.builder - .build_store(var_data_ptr, v) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - } - 2 => { - let bitw = value.get_type().get_bit_width(); - let v = if bitw < 16 { - self.builder - .build_int_s_extend(value, self.context.i16_type(), "expr_sext_i16") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - } else if bitw > 16 { - self.builder - .build_int_truncate(value, self.context.i16_type(), "expr_trunc_i16") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - } else { - value - }; - let i16_ptr_ty = self.context.ptr_type(AddressSpace::default()); - let cast_ptr = self - .builder - .build_pointer_cast(var_data_ptr, i16_ptr_ty, "expr_i16_ptr") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.builder - .build_store(cast_ptr, v) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - } - 4 => { - let bitw = value.get_type().get_bit_width(); - let v = if bitw < 32 { - self.builder - .build_int_s_extend(value, self.context.i32_type(), "expr_sext_i32") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - } else if bitw > 32 { - self.builder - .build_int_truncate(value, self.context.i32_type(), "expr_trunc_i32") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - } else { - value - }; - let i32_ptr_ty = self.context.ptr_type(AddressSpace::default()); - let cast_ptr = self - .builder - .build_pointer_cast(var_data_ptr, i32_ptr_ty, "expr_i32_ptr") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.builder - .build_store(cast_ptr, v) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - } - 8 => { - let v64 = if value.get_type().get_bit_width() < 64 { - self.builder - .build_int_s_extend(value, self.context.i64_type(), "expr_sext_i64") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - } else { - value - }; - let i64_ptr_ty = self.context.ptr_type(AddressSpace::default()); - let cast_ptr = self - .builder - .build_pointer_cast(var_data_ptr, i64_ptr_ty, "expr_i64_ptr") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.builder - .build_store(cast_ptr, v64) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - } - n => { - // Fallback: write lowest n bytes little-endian - let v64 = if value.get_type().get_bit_width() < 64 { - self.builder - .build_int_s_extend(value, self.context.i64_type(), "expr_sext_fallback") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - } else { - value - }; - for i in 0..n { - let shift = self.context.i64_type().const_int((i * 8) as u64, false); - let shifted = self - .builder - .build_right_shift(v64, shift, false, &format!("expr_shr_{i}")) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let byte = self - .builder - .build_int_truncate( - shifted, - self.context.i8_type(), - &format!("expr_byte_{i}"), - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let byte_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - var_data_ptr, - &[self.context.i32_type().const_int(i as u64, false)], - &format!("expr_byte_ptr_{i}"), - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - }; - self.builder - .build_store(byte_ptr, byte) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - } - } - } - - // Already accumulated; EndInstruction will send the whole event - Ok(()) - } - fn is_char_byte_typeinfo(t: &ghostscope_dwarf::TypeInfo) -> bool { - use ghostscope_dwarf::TypeInfo as TI; - match t { - TI::BaseType { size, encoding, .. } => { - *size == 1 - && (*encoding == ghostscope_dwarf::constants::DW_ATE_unsigned_char.0 as u16 - || *encoding == ghostscope_dwarf::constants::DW_ATE_signed_char.0 as u16 - || *encoding == ghostscope_dwarf::constants::DW_ATE_unsigned.0 as u16 - || *encoding == ghostscope_dwarf::constants::DW_ATE_signed.0 as u16) - } - TI::TypedefType { - underlying_type, .. - } - | TI::QualifiedType { - underlying_type, .. - } => Self::is_char_byte_typeinfo(underlying_type), - _ => false, - } - } - - /// Compute read size for a given DWARF type. - /// Keep strict behavior for general unsized arrays; only apply a bounded fallback for char[]. - fn compute_read_size_for_type(t: &ghostscope_dwarf::TypeInfo) -> usize { - use ghostscope_dwarf::TypeInfo as TI; - match t { - TI::ArrayType { - element_type, - element_count, - total_size, - } => { - // Prefer DWARF-provided total size - if let Some(ts) = total_size { - return *ts as usize; - } - // Fallback for arrays without total_size: need element_count * elem_size - let elem_size = element_type.size() as usize; - if elem_size == 0 { - return 0; - } - if let Some(cnt) = element_count { - return elem_size * (*cnt as usize); - } - // Some toolchains emit extern/definition pairs where char[] has no bound in DWARF. - // Keep other unsized arrays strict to avoid silently over-reading unknown layouts. - if Self::is_char_byte_typeinfo(element_type) { - return Self::UNKNOWN_CHAR_ARRAY_READ_FALLBACK; - } - 0 - } - TI::TypedefType { - underlying_type, .. - } - | TI::QualifiedType { - underlying_type, .. - } => Self::compute_read_size_for_type(underlying_type), - _ => t.size() as usize, - } - } - - fn unwrap_alias_candidate_dwarf_type( - mut t: &ghostscope_dwarf::TypeInfo, - ) -> &ghostscope_dwarf::TypeInfo { - while let ghostscope_dwarf::TypeInfo::TypedefType { - underlying_type, .. - } - | ghostscope_dwarf::TypeInfo::QualifiedType { - underlying_type, .. - } = t - { - t = underlying_type.as_ref(); - } - t - } - - fn is_aliasable_dwarf_type(t: &ghostscope_dwarf::TypeInfo) -> bool { - matches!( - Self::unwrap_alias_candidate_dwarf_type(t), - ghostscope_dwarf::TypeInfo::PointerType { .. } - | ghostscope_dwarf::TypeInfo::ArrayType { .. } - | ghostscope_dwarf::TypeInfo::StructType { .. } - | ghostscope_dwarf::TypeInfo::UnionType { .. } - ) - } - - fn expr_to_name(&self, expr: &crate::script::ast::Expr) -> String { - use crate::script::ast::Expr as E; - fn inner(e: &E) -> String { - match e { - E::Variable(s) => s.clone(), - E::MemberAccess(obj, field) => format!("{}.{field}", inner(obj)), - E::ArrayAccess(arr, idx) => format!("{}[{}]", inner(arr), inner(idx)), - E::PointerDeref(p) => format!("*{}", inner(p)), - E::AddressOf(p) => format!("&{}", inner(p)), - E::ChainAccess(v) => v.join("."), - E::Int(v) => v.to_string(), - E::String(s) => format!("\"{s}\""), - E::Float(v) => format!("{v}"), - E::UnaryNot(e1) => format!("!{}", inner(e1)), - E::Bool(v) => v.to_string(), - E::SpecialVar(s) => format!("${s}"), - E::BuiltinCall { name, args } => { - let arg_strs: Vec = args.iter().map(inner).collect(); - format!("{}({})", name, arg_strs.join(", ")) - } - E::BinaryOp { left, op, right } => { - let op_str = match op { - crate::script::ast::BinaryOp::Add => "+", - crate::script::ast::BinaryOp::Subtract => "-", - crate::script::ast::BinaryOp::Multiply => "*", - crate::script::ast::BinaryOp::Divide => "/", - crate::script::ast::BinaryOp::Equal => "==", - crate::script::ast::BinaryOp::NotEqual => "!=", - crate::script::ast::BinaryOp::LessThan => "<", - crate::script::ast::BinaryOp::LessEqual => "<=", - crate::script::ast::BinaryOp::GreaterThan => ">", - crate::script::ast::BinaryOp::GreaterEqual => ">=", - crate::script::ast::BinaryOp::LogicalAnd => "&&", - crate::script::ast::BinaryOp::LogicalOr => "||", - }; - format!("({}{}{})", inner(left), op_str, inner(right)) - } - } - } - let s_full = inner(expr); - const MAX_NAME: usize = 96; - if s_full.chars().count() > MAX_NAME { - // Keep space for ellipsis - let keep = MAX_NAME.saturating_sub(3); - let mut acc = String::with_capacity(MAX_NAME); - for (i, ch) in s_full.chars().enumerate() { - if i >= keep { - break; - } - acc.push(ch); - } - acc.push_str("..."); - acc - } else { - s_full - } - } - - fn expr_contains_builtin(expr: &crate::script::ast::Expr) -> bool { - use crate::script::ast::Expr as E; - - match expr { - E::BuiltinCall { .. } => true, - E::UnaryNot(inner) - | E::PointerDeref(inner) - | E::AddressOf(inner) - | E::MemberAccess(inner, _) => Self::expr_contains_builtin(inner), - E::ArrayAccess(base, index) => { - Self::expr_contains_builtin(base) || Self::expr_contains_builtin(index) - } - E::BinaryOp { left, right, .. } => { - Self::expr_contains_builtin(left) || Self::expr_contains_builtin(right) - } - E::Int(_) - | E::Float(_) - | E::String(_) - | E::Bool(_) - | E::Variable(_) - | E::ChainAccess(_) - | E::SpecialVar(_) => false, - } - } - - fn compile_print_expr_with_builtin_exprerror( - &mut self, - expr: &crate::script::ast::Expr, - compile: F, - ) -> Result - where - F: FnOnce(&mut Self) -> Result, - { - if !Self::expr_contains_builtin(expr) { - return compile(self); - } - - let prev_context_active = self.condition_context_active; - if prev_context_active { - return compile(self); - } - - let expr_index = self.trace_context.add_string(self.expr_to_name(expr)); - let entry_event_bytes = self.compile_time_event_bytes_upper_bound; - - self.reset_condition_error()?; - self.condition_context_active = true; - let compiled = compile(self); - self.condition_context_active = prev_context_active; - let compiled = compiled?; - - let current_function = self - .builder - .get_insert_block() - .ok_or_else(|| CodeGenError::LLVMError("No current basic block".to_string()))? - .get_parent() - .ok_or_else(|| CodeGenError::LLVMError("No parent function".to_string()))?; - let err_block = self - .context - .append_basic_block(current_function, "print_expr_err_block"); - let ok_block = self - .context - .append_basic_block(current_function, "print_expr_ok_block"); - let merge_block = self - .context - .append_basic_block(current_function, "print_expr_merge_block"); - let cond_err_pred = self.build_condition_error_predicate()?; - self.builder - .build_conditional_branch(cond_err_pred, err_block, ok_block) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to branch on print expr error: {e}")) - })?; - - self.builder.position_at_end(err_block); - self.compile_time_event_bytes_upper_bound = entry_event_bytes; - self.emit_current_condition_exprerror(expr_index, "print_expr")?; - let err_path_event_bytes = self.compile_time_event_bytes_upper_bound; - self.builder - .build_unconditional_branch(merge_block) - .map_err(|e| { - CodeGenError::LLVMError(format!( - "Failed to branch from print expr error block: {e}" - )) - })?; - - self.builder.position_at_end(ok_block); - self.compile_time_event_bytes_upper_bound = entry_event_bytes; - self.builder - .build_unconditional_branch(merge_block) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to branch from print expr ok block: {e}")) - })?; - - self.builder.position_at_end(merge_block); - self.compile_time_event_bytes_upper_bound = entry_event_bytes.max(err_path_event_bytes); - Ok(compiled) - } - - fn emit_current_condition_exprerror( - &mut self, - expr_index: u16, - name_prefix: &str, - ) -> Result<()> { - let cond_err_ptr = self.get_or_create_cond_error_global(); - let err_code = self - .builder - .build_load( - self.context.i8_type(), - cond_err_ptr, - &format!("{name_prefix}_err_code"), - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - .into_int_value(); - let cond_err_addr_ptr = self.get_or_create_cond_error_addr_global(); - let err_addr = self - .builder - .build_load( - self.context.i64_type(), - cond_err_addr_ptr, - &format!("{name_prefix}_err_addr"), - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - .into_int_value(); - let cond_err_flags_ptr = self.get_or_create_cond_error_flags_global(); - let err_flags = self - .builder - .build_load( - self.context.i8_type(), - cond_err_flags_ptr, - &format!("{name_prefix}_err_flags"), - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - .into_int_value(); - self.generate_expr_error(expr_index, err_code, err_flags, err_addr) - } - - /// Heuristic to decide if an expression should be bound as a DWARF alias variable. - /// Prefer shapes that resolve to a runtime address via DWARF or address-of: - /// - AddressOf(...) - /// - Member/Array/PointerDeref/Chain access - /// - Variable that is a DWARF-backed symbol (not a script var) - /// - Offset arithmetic on top of an aliasy expression: alias +/- integer expression - fn is_alias_candidate_expr(&mut self, expr: &crate::script::ast::Expr) -> bool { - use crate::script::ast::BinaryOp as BO; - use crate::script::ast::Expr as E; - match expr { - // Alias variable names are alias candidates - E::Variable(name) if self.alias_variable_exists(name) => true, - // Explicit address-of is always an alias - E::AddressOf(_) => true, - // Constant offset on top of an alias-eligible expression - E::BinaryOp { - left, - op: BO::Add, - right, - } => { - let left_is_alias = self.is_alias_candidate_expr(left); - let right_is_alias = self.is_alias_candidate_expr(right); - (left_is_alias && !right_is_alias) || (right_is_alias && !left_is_alias) - } - E::BinaryOp { - left, - op: BO::Subtract, - right, - } => self.is_alias_candidate_expr(left) && !self.is_alias_candidate_expr(right), - // Otherwise, only keep address-like or aggregate DWARF expressions as aliases. - // Scalar DWARF expressions should stay concrete so `let n = foo.len;` behaves - // like an integer script variable and remains usable in capture-length formatting. - other => self - .query_dwarf_for_complex_expr(other) - .ok() - .flatten() - .and_then(|var| var.dwarf_type) - .is_some_and(|ty| Self::is_aliasable_dwarf_type(&ty)), - } - } - - // removed old helpers (pure lvalue/binary_op detection) — unified resolver handles shapes - - /// Main entry point: compile program with staged transmission system - pub fn compile_program_with_staged_transmission( - &mut self, - program: &Program, - _variable_types: HashMap, - ) -> Result { - info!("Compiling program with staged transmission system"); - - // Step 1: Send TraceEventHeader - self.send_trace_event_header()?; - info!("Sent TraceEventHeader"); - - // Step 2: Send TraceEventMessage with dynamic trace_id - let trace_id = self.current_trace_id.map(|id| id as u64).unwrap_or(0); - self.send_trace_event_message(trace_id)?; - info!("Sent TraceEventMessage"); - - // Reset per-event execution status flags - self.store_flag_value("_gs_any_fail", 0)?; - self.store_flag_value("_gs_any_success", 0)?; - - // Step 3: Process each statement and generate LLVM IR on-demand - let mut instruction_count = 0u16; - for statement in &program.statements { - instruction_count += self.compile_statement(statement)?; - } - - // Step 4: Send EndInstruction to mark completion - self.send_end_instruction(instruction_count)?; - info!( - "Sent EndInstruction with {} total instructions", - instruction_count - ); - - // Step 5: Return the trace context for user-space parsing - Ok(self.trace_context.clone()) - } - - /// Compile a statement and return the number of instructions generated - pub fn compile_statement(&mut self, statement: &Statement) -> Result { - debug!("Compiling statement: {:?}", statement); - - match statement { - Statement::AliasDeclaration { name, target } => { - info!("Registering alias variable: {} = {:?}", name, target); - // Declare in current scope (no redeclaration or shadowing) - self.declare_name_in_current_scope(name)?; - self.set_alias_variable(name, target.clone()); - Ok(0) - } - Statement::VarDeclaration { name, value } => { - info!("Processing variable declaration: {} = {:?}", name, value); - // Declare in current scope (no redeclaration or shadowing) - self.declare_name_in_current_scope(name)?; - // Decide whether this is an alias binding (DWARF-backed address/reference) - if self.is_alias_candidate_expr(value) { - self.set_alias_variable(name, value.clone()); - tracing::debug!(var=%name, "Registered DWARF alias variable"); - Ok(0) - } else { - // Compile the value expression and store as concrete variable - // Special-case: string literal and string var copy — record bytes for content printing - match value { - crate::script::Expr::String(s) => { - let mut bytes = s.as_bytes().to_vec(); - bytes.push(0); // NUL terminate for display convenience - self.set_string_variable_bytes(name, bytes); - } - crate::script::Expr::Variable(ref nm) => { - if self - .get_variable_type(nm) - .is_some_and(|t| matches!(t, crate::script::VarType::String)) - { - if let Some(b) = self.get_string_variable_bytes(nm).cloned() { - self.set_string_variable_bytes(name, b); - } - } - } - _ => {} - } - let compiled_value = self.compile_expr(value)?; - // Disallow storing pointer values in script variables, except for string literals - if let BasicValueEnum::PointerValue(_) = compiled_value { - // Allow if RHS is a string literal OR a string variable (VarType::String) - let allow_string_var_copy = match value { - crate::script::Expr::String(_) => true, - crate::script::Expr::Variable(ref nm) => self - .get_variable_type(nm) - .is_some_and(|t| matches!(t, crate::script::VarType::String)), - _ => false, - }; - if !allow_string_var_copy { - return Err(CodeGenError::TypeError( - "script variables cannot store pointer values; use DWARF alias (let v = &expr) or keep it as a string".to_string(), - )); - } - } - self.store_variable(name, compiled_value)?; - Ok(0) // VarDeclaration doesn't generate instructions - } - } - Statement::Print(print_stmt) => self.compile_print_statement(print_stmt), - Statement::If { - condition, - then_body, - else_body, - } => { - let entry_event_bytes = self.compile_time_event_bytes_upper_bound; - // Prepare condition context (runtime error capture) - // Pretty expression text for warning - let expr_text = self.expr_to_name(condition); - let expr_index = self.trace_context.add_string(expr_text); - // Activate condition context (compile-time flag) and reset runtime error byte - self.condition_context_active = true; - self.reset_condition_error()?; - - // Compile condition expression - let cond_value = self.compile_expr(condition)?; - - // Convert condition to i1 (boolean) for branching - let cond_bool = match cond_value { - BasicValueEnum::IntValue(int_val) => { - // Convert integer to boolean (non-zero = true) - self.builder - .build_int_compare( - inkwell::IntPredicate::NE, - int_val, - int_val.get_type().const_zero(), - "cond_bool", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to create condition: {e}")) - })? - } - _ => { - return Err(CodeGenError::LLVMError( - "Condition must evaluate to integer".to_string(), - )); - } - }; - - // Get current function from builder - let current_function = self - .builder - .get_insert_block() - .ok_or_else(|| CodeGenError::LLVMError("No current basic block".to_string()))? - .get_parent() - .ok_or_else(|| CodeGenError::LLVMError("No parent function".to_string()))?; - - // Create basic blocks for error/noerror and then/else paths - let then_block = self - .context - .append_basic_block(current_function, "then_block"); - let else_block = self - .context - .append_basic_block(current_function, "else_block"); - let merge_block = self - .context - .append_basic_block(current_function, "merge_block"); - let err_block = self - .context - .append_basic_block(current_function, "cond_err_block"); - let ok_block = self - .context - .append_basic_block(current_function, "cond_ok_block"); - // After cond compiled, deactivate compile-time flag - self.condition_context_active = false; - - // First branch: did runtime errors occur while evaluating the condition? - let cond_err_pred = self.build_condition_error_predicate()?; - self.builder - .build_conditional_branch(cond_err_pred, err_block, ok_block) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to branch on cond_err: {e}")) - })?; - - // Error path: emit ExprError and decide destination - self.builder.position_at_end(err_block); - self.compile_time_event_bytes_upper_bound = entry_event_bytes; - self.emit_current_condition_exprerror(expr_index, "cond")?; - // Decide where to go on error: if else_body is If (else-if), go to else_block to continue; - // otherwise, skip else (suppress) and jump to merge. - let goto_else = matches!(else_body.as_deref(), Some(Statement::If { .. })); - let err_path_event_bytes = self.compile_time_event_bytes_upper_bound; - if goto_else { - self.builder - .build_unconditional_branch(else_block) - .map_err(|e| { - CodeGenError::LLVMError(format!( - "Failed to branch to else on error: {e}" - )) - })?; - } else { - self.builder - .build_unconditional_branch(merge_block) - .map_err(|e| { - CodeGenError::LLVMError(format!( - "Failed to branch to merge on error: {e}" - )) - })?; - } - - // No-error path: branch on boolean condition - self.builder.position_at_end(ok_block); - self.compile_time_event_bytes_upper_bound = entry_event_bytes; - self.builder - .build_conditional_branch(cond_bool, then_block, else_block) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to create branch: {e}")) - })?; - - // Build then block - self.builder.position_at_end(then_block); - self.compile_time_event_bytes_upper_bound = entry_event_bytes; - let mut then_instructions = 0u16; - self.enter_scope(); - for stmt in then_body { - then_instructions += self.compile_statement(stmt)?; - } - self.exit_scope(); - let then_event_bytes = self.compile_time_event_bytes_upper_bound; - self.builder - .build_unconditional_branch(merge_block) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to branch to merge: {e}")) - })?; - - // Build else block - self.builder.position_at_end(else_block); - let else_entry_event_bytes = if goto_else { - entry_event_bytes.max(err_path_event_bytes) - } else { - entry_event_bytes - }; - self.compile_time_event_bytes_upper_bound = else_entry_event_bytes; - let mut else_instructions = 0u16; - if let Some(else_stmt) = else_body { - self.enter_scope(); - else_instructions += self.compile_statement(else_stmt)?; - self.exit_scope(); - } - self.builder - .build_unconditional_branch(merge_block) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to branch to merge: {e}")) - })?; - let else_event_bytes = self.compile_time_event_bytes_upper_bound; - - // Continue with merge block - self.builder.position_at_end(merge_block); - self.compile_time_event_bytes_upper_bound = if goto_else { - then_event_bytes.max(else_event_bytes) - } else { - then_event_bytes - .max(else_event_bytes) - .max(err_path_event_bytes) - }; - - // Return the maximum instructions from either branch - Ok(std::cmp::max(then_instructions, else_instructions)) - } - Statement::Block(nested_statements) => { - let mut total_instructions = 0u16; - self.enter_scope(); - for stmt in nested_statements { - total_instructions += self.compile_statement(stmt)?; - } - self.exit_scope(); - Ok(total_instructions) - } - Statement::TracePoint { pattern: _, body } => { - let mut total_instructions = 0u16; - // Start a new scope for the trace body - self.enter_scope(); - for stmt in body { - total_instructions += self.compile_statement(stmt)?; - } - self.exit_scope(); - Ok(total_instructions) - } - _ => { - warn!("Unsupported statement type: {:?}", statement); - Ok(0) - } - } - } - - /// Compile print statement and generate LLVM IR on-demand - pub fn compile_print_statement(&mut self, print_stmt: &PrintStatement) -> Result { - info!("Compiling print statement: {:?}", print_stmt); - - match print_stmt { - PrintStatement::String(s) => { - info!("Processing string literal: {}", s); - // 1. Add string to TraceContext - let string_index = self.trace_context.add_string(s.to_string()); - // 2. Generate eBPF code for PrintStringIndex - self.generate_print_string_index(string_index)?; - Ok(1) // Generated 1 instruction - } - PrintStatement::Variable(var_name) => { - info!("Processing variable: {}", var_name); - let expr = crate::script::Expr::Variable(var_name.clone()); - let arg = self.resolve_expr_to_arg(&expr)?; - let n = self.emit_print_from_arg(arg)?; - tracing::trace!( - var_name = %var_name, - instructions = n, - "compile_print_statement: emitted via unified resolver" - ); - Ok(n) - } - PrintStatement::ComplexVariable(expr) => { - info!("Processing complex variable: {:?}", expr); - let arg = self.compile_print_expr_with_builtin_exprerror(expr, |ctx| { - ctx.resolve_expr_to_arg(expr) - })?; - let n = self.emit_print_from_arg(arg)?; - tracing::trace!( - instructions = n, - "compile_print_statement: emitted via unified resolver" - ); - Ok(n) - } - PrintStatement::Formatted { format, args } => { - info!( - "Processing formatted print: '{}' with {} args", - format, - args.len() - ); - self.compile_formatted_print(format, args) - } - } - } - - /// Compile formatted print statement: collect all variable data and send as PrintComplexFormat instruction - fn resolve_memory_format_address( - &mut self, - expr: &crate::script::ast::Expr, - ) -> Result> { - if let Ok(addr) = self.resolve_runtime_address_from_expr(expr) { - return Ok(addr); - } - - let dwarf_error = match self.query_dwarf_for_complex_expr(expr) { - Ok(Some(var)) => { - let pc_address = self.get_compile_time_context()?.pc_address; - return self.variable_read_plan_to_runtime_address(&var, pc_address, None); - } - Ok(None) => None, - Err(err) => { - tracing::debug!( - error = %err, - "DWARF address resolution unavailable for memory format expression; trying script value fallback" - ); - Some(err) - } - }; - - match self.compile_expr(expr)? { - BasicValueEnum::PointerValue(pv) => self - .builder - .build_ptr_to_int(pv, self.context.i64_type(), "ptr_to_i64") - .map(|value| RuntimeAddress::available(value, self.context)) - .map_err(|e| CodeGenError::Builder(e.to_string())), - _ => { - Err(dwarf_error - .unwrap_or_else(|| CodeGenError::VariableNotFound(format!("{expr:?}")))) - } - } - } - - fn compile_formatted_print( - &mut self, - format: &str, - args: &[crate::script::ast::Expr], - ) -> Result { - info!( - "Compiling formatted print: '{}' with {} arguments", - format, - args.len() - ); - let format_string_index = self.trace_context.add_string(format.to_string()); - let mut complex_args: Vec> = Vec::with_capacity(args.len()); - - // Parse placeholders from the format string to support extended specifiers - #[derive(Clone, Copy, Debug, PartialEq)] - enum Conv { - Default, - HexLower, - HexUpper, - Ptr, - Ascii, - } - #[derive(Clone, Debug, PartialEq)] - enum LenSpec { - None, - Static(usize), - Star, - Capture(String), - } - - fn parse_static_len(spec: &str) -> Option { - if spec.chars().all(|c| c.is_ascii_digit()) { - return spec.parse::().ok(); - } - if let Some(hex) = spec.strip_prefix("0x") { - if !hex.is_empty() && hex.chars().all(|c| c.is_ascii_hexdigit()) { - return usize::from_str_radix(hex, 16).ok(); - } - } - if let Some(oct) = spec.strip_prefix("0o") { - if !oct.is_empty() && oct.chars().all(|c| matches!(c, '0'..='7')) { - return usize::from_str_radix(oct, 8).ok(); - } - } - if let Some(bin) = spec.strip_prefix("0b") { - if !bin.is_empty() && bin.chars().all(|c| matches!(c, '0' | '1')) { - return usize::from_str_radix(bin, 2).ok(); - } - } - None - } - - fn parse_slots(fmt: &str) -> Vec<(Conv, LenSpec)> { - let mut res = Vec::new(); - let mut it = fmt.chars().peekable(); - while let Some(ch) = it.next() { - if ch == '{' { - if it.peek() == Some(&'{') { - it.next(); - continue; - } - let mut content = String::new(); - for c in it.by_ref() { - if c == '}' { - break; - } - content.push(c); - } - if content.is_empty() { - res.push((Conv::Default, LenSpec::None)); - } else if let Some(rest) = content.strip_prefix(':') { - let mut sit = rest.chars(); - let conv = match sit.next().unwrap_or(' ') { - 'x' => Conv::HexLower, - 'X' => Conv::HexUpper, - 'p' => Conv::Ptr, - 's' => Conv::Ascii, - _ => Conv::Default, - }; - let rest: String = sit.collect(); - let lens = if rest.is_empty() { - LenSpec::None - } else if let Some(r) = rest.strip_prefix('.') { - if r == "*" { - LenSpec::Star - } else if let Some(s) = r.strip_suffix('$') { - LenSpec::Capture(s.to_string()) - } else if let Some(n) = parse_static_len(r) { - LenSpec::Static(n) - } else { - LenSpec::None - } - } else { - LenSpec::None - }; - res.push((conv, lens)); - } else { - res.push((Conv::Default, LenSpec::None)); - } - } - } - res - } - - let slots = parse_slots(format); - let mut ai = 0usize; // arg cursor - for (conv, lens) in slots.into_iter() { - match conv { - Conv::Default => { - if ai >= args.len() { - break; - } - let expr = &args[ai]; - let a = self.compile_print_expr_with_builtin_exprerror(expr, |ctx| { - ctx.resolve_expr_to_arg(expr) - })?; - complex_args.push(a); - ai += 1; - } - Conv::Ptr => { - if ai >= args.len() { - break; - } - // Force pointer address payload (u64) regardless of DWARF shape - let expr = &args[ai]; - // Try compile to IntValue or PointerValue - let val = self.compile_expr(expr)?; - let iv = match val { - BasicValueEnum::IntValue(iv) => iv, - BasicValueEnum::PointerValue(pv) => self - .builder - .build_ptr_to_int(pv, self.context.i64_type(), "ptr_to_i64") - .map_err(|e| CodeGenError::Builder(e.to_string()))?, - _ => self - .compile_dwarf_expression(expr) - .and_then(|bv| match bv { - BasicValueEnum::IntValue(iv) => Ok(iv), - BasicValueEnum::PointerValue(pv) => self - .builder - .build_ptr_to_int(pv, self.context.i64_type(), "ptr_to_i64") - .map_err(|e| CodeGenError::Builder(e.to_string())), - _ => Err(CodeGenError::TypeError("pointer expected".into())), - })?, - }; - complex_args.push(ComplexArg { - var_name_index: self - .trace_context - .add_variable_name(self.expr_to_name(expr)), - type_index: self.add_synthesized_type_index_for_kind(TypeKind::Pointer), - access_path: Vec::new(), - data_len: 8, - source: ComplexArgSource::ComputedInt { - value: iv, - byte_len: 8, - }, - }); - ai += 1; - } - Conv::HexLower | Conv::HexUpper | Conv::Ascii => { - // Memory dump; handle static length at compile time. Other cases use default read and let user space trim. - // Handle star: consume length arg (as computed int) then value arg - let wants_ascii = matches!(conv, Conv::Ascii); - match lens { - LenSpec::Static(n) if ai < args.len() => { - // Resolve value expr address - let expr = &args[ai]; - let addr_iv = self.resolve_memory_format_address(expr)?; - complex_args.push(ComplexArg { - var_name_index: self - .trace_context - .add_variable_name(self.expr_to_name(expr)), - type_index: self - .trace_context - .add_type(ghostscope_dwarf::TypeInfo::ArrayType { - element_type: Box::new(ghostscope_dwarf::TypeInfo::BaseType { - name: "u8".into(), - size: 1, - encoding: ghostscope_dwarf::constants::DW_ATE_unsigned_char - .0 - as u16, - }), - element_count: Some(n as u64), - total_size: Some(n as u64), - }), - access_path: Vec::new(), - data_len: n, - source: ComplexArgSource::MemDump { - address: addr_iv, - len: n, - }, - }); - ai += 1; - } - LenSpec::Star => { - // Dynamic length: consume length arg, then create a dynamic mem-dump for value - if ai + 1 >= args.len() { - break; - } - // length argument - let len_expr = &args[ai]; - let len_val = self.compile_expr(len_expr)?; - let (len_iv, byte_len) = match len_val { - BasicValueEnum::IntValue(iv) => (iv, 8usize), - _ => { - return Err(CodeGenError::TypeError( - "length must be integer".into(), - )) - } - }; - complex_args.push(ComplexArg { - var_name_index: self - .trace_context - .add_variable_name("__len".into()), - type_index: self.add_synthesized_type_index_for_kind(TypeKind::U64), - access_path: Vec::new(), - data_len: byte_len, - source: ComplexArgSource::ComputedInt { - value: len_iv, - byte_len, - }, - }); - - // value expression -> dynamic memdump with cap - let val_expr = &args[ai + 1]; - let addr_iv = self.resolve_memory_format_address(val_expr)?; - // Reserve up to configured per-arg cap for dynamic slices - let cap = self.compile_options.mem_dump_cap as usize; - complex_args.push(ComplexArg { - var_name_index: self - .trace_context - .add_variable_name(self.expr_to_name(val_expr)), - type_index: self - .trace_context - .add_type(ghostscope_dwarf::TypeInfo::ArrayType { - element_type: Box::new(ghostscope_dwarf::TypeInfo::BaseType { - name: "u8".into(), - size: 1, - encoding: ghostscope_dwarf::constants::DW_ATE_unsigned_char - .0 - as u16, - }), - element_count: Some(cap as u64), - total_size: Some(cap as u64), - }), - access_path: Vec::new(), - data_len: cap, - source: ComplexArgSource::MemDumpDynamic { - address: addr_iv, - len_value: len_iv, - max_len: cap, - }, - }); - ai += 2; - } - LenSpec::Capture(name) => { - // Use script variable `name` as length; emit a length argument + a dynamic mem-dump argument - if ai >= args.len() { - break; - } - if !self.variable_exists(&name) { - return Err(CodeGenError::TypeError(format!( - "capture length variable '{name}' not found" - ))); - } - // length as computed int - let len_val = self.load_variable(&name)?; - let (len_iv, byte_len) = match len_val { - BasicValueEnum::IntValue(iv) => (iv, 8usize), - BasicValueEnum::PointerValue(pv) => ( - self.builder - .build_ptr_to_int( - pv, - self.context.i64_type(), - "len_ptr_to_i64", - ) - .map_err(|e| CodeGenError::Builder(e.to_string()))?, - 8usize, - ), - _ => { - return Err(CodeGenError::TypeError( - "length must be integer/pointer".into(), - )) - } - }; - complex_args.push(ComplexArg { - var_name_index: self.trace_context.add_variable_name(name.clone()), - type_index: self.add_synthesized_type_index_for_kind(TypeKind::U64), - access_path: Vec::new(), - data_len: byte_len, - source: ComplexArgSource::ComputedInt { - value: len_iv, - byte_len, - }, - }); - - // value - let val_expr = &args[ai]; - let addr_iv = self.resolve_memory_format_address(val_expr)?; - let cap = self.compile_options.mem_dump_cap as usize; - complex_args.push(ComplexArg { - var_name_index: self - .trace_context - .add_variable_name(self.expr_to_name(val_expr)), - type_index: self - .trace_context - .add_type(ghostscope_dwarf::TypeInfo::ArrayType { - element_type: Box::new(ghostscope_dwarf::TypeInfo::BaseType { - name: "u8".into(), - size: 1, - encoding: ghostscope_dwarf::constants::DW_ATE_unsigned_char - .0 - as u16, - }), - element_count: Some(cap as u64), - total_size: Some(cap as u64), - }), - access_path: Vec::new(), - data_len: cap, - source: ComplexArgSource::MemDumpDynamic { - address: addr_iv, - len_value: len_iv, - max_len: cap, - }, - }); - ai += 1; - } - _ => { - // None: resolve value directly - if ai >= args.len() { - break; - } - complex_args.push(self.resolve_expr_to_arg(&args[ai])?); - ai += 1; - } - } - let _ = wants_ascii; // reserved for future per-arg metadata - } - } - } - self.generate_print_complex_format_instruction(format_string_index, &complex_args)?; - Ok(1) - } - - /// Resolve variable with correct priority: script variables first, then DWARF variables - /// This method is copied from protocol.rs to maintain functionality - pub fn resolve_variable_with_priority(&mut self, var_name: &str) -> Result<(u16, TypeKind)> { - info!("Resolving variable '{}' with correct priority", var_name); - - // Step 1: Check if it's a script-defined variable first - if self.variable_exists(var_name) { - info!("Found script variable: {}", var_name); - - // Get the variable's LLVM value to infer type - let loaded_value = self.load_variable(var_name)?; - let type_encoding = self.infer_type_from_llvm_value(&loaded_value); - - // Add to TraceContext - let var_name_index = self.trace_context.add_variable_name(var_name.to_string()); - - return Ok((var_name_index, type_encoding)); - } - - // Step 2: If not found in script variables, try DWARF variables - info!( - "Variable '{}' not found in script variables, checking DWARF", - var_name - ); - - let compile_context = self.get_compile_time_context()?.clone(); - let read_plan = match self.query_dwarf_for_variable(var_name)? { - Some(var) => var, - None => { - return Err(CodeGenError::VariableNotFound(format!( - "Variable '{}' not found in script or DWARF at PC 0x{:x} in module '{}'", - var_name, compile_context.pc_address, compile_context.module_path - ))); - } - }; - - // Convert DWARF type information to TypeKind using existing method - let dwarf_type = read_plan.dwarf_type.as_ref().ok_or_else(|| { - CodeGenError::DwarfError("Variable has no DWARF type information".to_string()) - })?; - let type_encoding = TypeKind::from(dwarf_type); - - // Add to StringTable - let var_name_index = self.trace_context.add_variable_name(var_name.to_string()); - - info!( - "DWARF variable '{}' resolved successfully with type: {:?}", - var_name, type_encoding - ); - - Ok((var_name_index, type_encoding)) - } - - /// Synthesize a DWARF-like TypeInfo for a basic TypeKind (for script variables) - fn synthesize_typeinfo_for_typekind(&self, kind: TypeKind) -> ghostscope_dwarf::TypeInfo { - use ghostscope_dwarf::constants::{ - DW_ATE_boolean, DW_ATE_float, DW_ATE_signed, DW_ATE_signed_char, DW_ATE_unsigned, - }; - use ghostscope_dwarf::TypeInfo as TI; - - match kind { - TypeKind::Bool => TI::BaseType { - name: "bool".to_string(), - size: 1, - encoding: DW_ATE_boolean.0 as u16, - }, - TypeKind::F32 => TI::BaseType { - name: "f32".to_string(), - size: 4, - encoding: DW_ATE_float.0 as u16, - }, - TypeKind::F64 => TI::BaseType { - name: "f64".to_string(), - size: 8, - encoding: DW_ATE_float.0 as u16, - }, - TypeKind::I8 => TI::BaseType { - name: "i8".to_string(), - size: 1, - encoding: DW_ATE_signed_char.0 as u16, - }, - TypeKind::I16 => TI::BaseType { - name: "i16".to_string(), - size: 2, - encoding: DW_ATE_signed.0 as u16, - }, - TypeKind::I32 => TI::BaseType { - name: "i32".to_string(), - size: 4, - encoding: DW_ATE_signed.0 as u16, - }, - TypeKind::I64 => TI::BaseType { - name: "i64".to_string(), - size: 8, - encoding: DW_ATE_signed.0 as u16, - }, - TypeKind::U8 | TypeKind::Char => TI::BaseType { - name: "u8".to_string(), - size: 1, - encoding: DW_ATE_unsigned.0 as u16, - }, - TypeKind::U16 => TI::BaseType { - name: "u16".to_string(), - size: 2, - encoding: DW_ATE_unsigned.0 as u16, - }, - TypeKind::U32 => TI::BaseType { - name: "u32".to_string(), - size: 4, - encoding: DW_ATE_unsigned.0 as u16, - }, - TypeKind::U64 => TI::BaseType { - name: "u64".to_string(), - size: 8, - encoding: DW_ATE_unsigned.0 as u16, - }, - TypeKind::Pointer | TypeKind::CString | TypeKind::String | TypeKind::Unknown => { - // Use void* as a reasonable default for pointers/strings in script land - TI::PointerType { - target_type: Box::new(TI::UnknownType { - name: "void".to_string(), - }), - size: 8, - } - } - TypeKind::NullPointer => TI::PointerType { - target_type: Box::new(TI::UnknownType { - name: "void".to_string(), - }), - size: 8, - }, - _ => TI::BaseType { - name: "i64".to_string(), - size: 8, - encoding: DW_ATE_signed.0 as u16, - }, - } - } - - fn add_synthesized_type_index_for_kind(&mut self, kind: TypeKind) -> u16 { - let ti = self.synthesize_typeinfo_for_typekind(kind); - self.trace_context.add_type(ti) - } - - /// Infer TypeKind from LLVM value type - /// Copied from protocol.rs - fn infer_type_from_llvm_value(&self, value: &BasicValueEnum<'_>) -> TypeKind { - match value { - BasicValueEnum::IntValue(int_val) => { - match int_val.get_type().get_bit_width() { - 1 => TypeKind::Bool, - 8 => TypeKind::I8, // Default to signed for script variables - 16 => TypeKind::I16, - 32 => TypeKind::I32, - 64 => TypeKind::I64, - _ => TypeKind::I64, // Default fallback - } - } - BasicValueEnum::FloatValue(float_val) => { - match float_val.get_type() { - t if t == self.context.f32_type() => TypeKind::F32, - t if t == self.context.f64_type() => TypeKind::F64, - _ => TypeKind::F64, // Default fallback - } - } - BasicValueEnum::PointerValue(_) => TypeKind::Pointer, - _ => TypeKind::I64, // Conservative default - } - } - - /// Generate eBPF code for PrintComplexFormat instruction with runtime reads for variables - fn generate_print_complex_format_instruction( - &mut self, - format_string_index: u16, - complex_args: &[ComplexArg<'ctx>], - ) -> Result<()> { - use InstructionType::PrintComplexFormat as IT; - - // Keep a single formatted print within the remaining event budget on the current - // control-flow path, while still leaving room for EndInstruction. - let instruction_budget = print_complex_format_instruction_budget( - self.compile_options.max_trace_event_size as usize, - self.compile_time_event_bytes_upper_bound, - ); - let fixed_overhead = std::mem::size_of::() - + std::mem::size_of::(); - - // First pass: accumulate header bytes and static payload, record dynamic args - let mut arg_count = 0u8; - let mut headers_total = 0usize; - let mut static_payload_total = 0usize; - let mut dynamic_max_lens: Vec = Vec::new(); - let mut header_lens: Vec = Vec::with_capacity(complex_args.len()); - for a in complex_args { - // Header bytes per-arg: var_name_index(2) + type_index(2) + access_path_len(1) + status(1) + data_len(2) + access_path - let header_len = 2 + 2 + 1 + 1 + 2 + a.access_path.len(); - header_lens.push(header_len); - headers_total += header_len; - - match &a.source { - ComplexArgSource::ImmediateBytes { bytes } => static_payload_total += bytes.len(), - ComplexArgSource::AddressValue { .. } => static_payload_total += 8, - ComplexArgSource::RuntimeRead { .. } => { - static_payload_total += - std::cmp::max(a.data_len, DYNAMIC_READ_ERROR_PAYLOAD_LEN) - } - ComplexArgSource::ComputedInt { byte_len, .. } => static_payload_total += *byte_len, - ComplexArgSource::MemDump { len, .. } => { - static_payload_total += std::cmp::max(*len, DYNAMIC_READ_ERROR_PAYLOAD_LEN) - } - ComplexArgSource::MemDumpDynamic { max_len, .. } => dynamic_max_lens.push(*max_len), - } - arg_count = arg_count.saturating_add(1); - } - - // Static payload keeps its existing layout; dynamic payload shares the remaining - // instruction budget fairly so later {:s.*}/{:x.*} arguments do not get starved. - let remaining_for_payload = instruction_budget - .saturating_sub(fixed_overhead) - .saturating_sub(headers_total) - .saturating_sub(static_payload_total); - let dynamic_reservations = - allocate_dynamic_payload_reservations(&dynamic_max_lens, remaining_for_payload); - let mut dynamic_reservations_iter = dynamic_reservations.into_iter(); - - // Second pass: decide effective reserved payload for each arg - // Default to computed static payload; dynamic args share the event-derived budget - let mut effective_reserved: Vec = Vec::with_capacity(complex_args.len()); - for a in complex_args { - let reserved = match &a.source { - ComplexArgSource::ImmediateBytes { bytes } => bytes.len(), - ComplexArgSource::AddressValue { .. } => 8, - ComplexArgSource::RuntimeRead { .. } => { - std::cmp::max(a.data_len, DYNAMIC_READ_ERROR_PAYLOAD_LEN) - } - ComplexArgSource::ComputedInt { byte_len, .. } => *byte_len, - ComplexArgSource::MemDump { len, .. } => { - std::cmp::max(*len, DYNAMIC_READ_ERROR_PAYLOAD_LEN) - } - ComplexArgSource::MemDumpDynamic { .. } => { - dynamic_reservations_iter.next().unwrap_or(0) - } - }; - effective_reserved.push(reserved); - } - - // Now compute final inst_data_size using effective reservations - let total_args_payload: usize = - header_lens.iter().sum::() + effective_reserved.iter().sum::(); - let inst_data_size = std::mem::size_of::() + total_args_payload; - let total_size = std::mem::size_of::() + inst_data_size; - - // Reserve buffer directly in accumulation buffer to avoid extra copy - let buffer = self - .reserve_instruction_region_or_return_zero(total_size as u64)? - .into_value_after_runtime_returns(); - - // Avoid memset; global buffer is zero-initialized - - // Write InstructionHeader - let inst_type_val = self.context.i8_type().const_int(IT as u8 as u64, false); - self.builder - .build_store(buffer, inst_type_val) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store inst_type: {e}")))?; - // data_length at +1 - let data_length_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - buffer, - &[self.context.i32_type().const_int(1, false)], - "data_length_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get data_length GEP: {e}")) - })? - }; - let data_length_i16_ptr = self - .builder - .build_pointer_cast( - data_length_ptr, - self.context.ptr_type(AddressSpace::default()), - "data_length_i16_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast data_length ptr: {e}")))?; - let data_length_val = self - .context - .i16_type() - .const_int(inst_data_size as u64, false); - self.builder - .build_store(data_length_i16_ptr, data_length_val) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store data_length: {e}")))?; - - // Write PrintComplexFormatData at offset 4 - let data_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - buffer, - &[self.context.i32_type().const_int(4, false)], - "pcf_data_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get pcf_data_ptr GEP: {e}")) - })? - }; - - // format_string_index (u16) at +0 - let fsi_ptr = self - .builder - .build_pointer_cast( - data_ptr, - self.context.ptr_type(AddressSpace::default()), - "fsi_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast fsi_ptr: {e}")))?; - let fsi_val = self - .context - .i16_type() - .const_int(format_string_index as u64, false); - self.builder - .build_store(fsi_ptr, fsi_val) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store fsi: {e}")))?; - // arg_count (u8) at +2 - let arg_cnt_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - data_ptr, - &[self.context.i32_type().const_int(2, false)], - "arg_count_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to get arg_count GEP: {e}")))? - }; - self.builder - .build_store( - arg_cnt_ptr, - self.context.i8_type().const_int(arg_count as u64, false), - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store arg_count: {e}")))?; - - // Start of variable payload after PrintComplexFormatData — use computed effective reservations - let mut offset = std::mem::size_of::(); - for (arg_index, a) in complex_args.iter().enumerate() { - // Per-arg reserved payload length - let reserved_len = effective_reserved[arg_index]; - - // Base pointer = data_ptr + offset - let arg_base = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - data_ptr, - &[self.context.i32_type().const_int(offset as u64, false)], - "arg_base", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get arg_base GEP: {e}")) - })? - }; - - // var_name_index(u16) at +0 - let vni_cast = self - .builder - .build_pointer_cast( - arg_base, - self.context.ptr_type(AddressSpace::default()), - "vni_cast", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast vni ptr: {e}")))?; - self.builder - .build_store( - vni_cast, - self.context - .i16_type() - .const_int(a.var_name_index as u64, false), - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store vni: {e}")))?; - - // type_index(u16) at +2 - let ti_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - arg_base, - &[self.context.i32_type().const_int(2, false)], - "ti_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to get ti GEP: {e}")))? - }; - let ti_cast = self - .builder - .build_pointer_cast( - ti_ptr, - self.context.ptr_type(AddressSpace::default()), - "ti_cast", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast ti ptr: {e}")))?; - self.builder - .build_store( - ti_cast, - self.context - .i16_type() - .const_int(a.type_index as u64, false), - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store ti: {e}")))?; - - // status(u8) at +5 - let apl_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - arg_base, - &[self.context.i32_type().const_int(5, false)], - "status_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get status GEP: {e}")) - })? - }; - self.builder - .build_store(apl_ptr, self.context.i8_type().const_int(0, false)) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store status: {e}")))?; - - // access_path_len(u8) at +4 - let apl_ptr2 = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - arg_base, - &[self.context.i32_type().const_int(4, false)], - "apl_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to get apl GEP: {e}")))? - }; - self.builder - .build_store( - apl_ptr2, - self.context - .i8_type() - .const_int(a.access_path.len() as u64, false), - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store apl: {e}")))?; - - // access_path bytes at +6..+6+len - for (i, b) in a.access_path.iter().enumerate() { - let byte_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - arg_base, - &[self.context.i32_type().const_int((6 + i) as u64, false)], - &format!("ap_byte_{i}"), - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get ap byte GEP: {e}")) - })? - }; - self.builder - .build_store(byte_ptr, self.context.i8_type().const_int(*b as u64, false)) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to store ap byte: {e}")) - })?; - } - - // data_len(u16) at +6+path_len (store reserved_len to keep layout consistent) - let dl_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - arg_base, - &[self - .context - .i32_type() - .const_int((6 + a.access_path.len()) as u64, false)], - "dl_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to get dl GEP: {e}")))? - }; - let dl_cast = self - .builder - .build_pointer_cast( - dl_ptr, - self.context.ptr_type(AddressSpace::default()), - "dl_cast", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast dl ptr: {e}")))?; - self.builder - .build_store( - dl_cast, - self.context - .i16_type() - .const_int(reserved_len as u64, false), - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store data_len: {e}")))?; - - // variable data starts at +8+path_len - let var_data_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - arg_base, - &[self - .context - .i32_type() - .const_int((8 + a.access_path.len()) as u64, false)], - "var_data_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get var_data GEP: {e}")) - })? - }; - - // No dynamic cursor; we keep a compile-time offset and use reserved_len for layout - - match &a.source { - ComplexArgSource::ImmediateBytes { bytes, .. } => { - for (i, b) in bytes.iter().enumerate() { - let byte_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - var_data_ptr, - &[self.context.i32_type().const_int(i as u64, false)], - &format!("var_byte_{i}"), - ) - .map_err(|e| { - CodeGenError::LLVMError(format!( - "Failed to get var byte GEP: {e}" - )) - })? - }; - self.builder - .build_store( - byte_ptr, - self.context.i8_type().const_int(*b as u64, false), - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to store var byte: {e}")) - })?; - } - // data_len already set to reserved_len - } - ComplexArgSource::MemDump { address, len } => { - // Directly probe-read into payload to avoid byte-wise copies - let ptr_ty = self.context.ptr_type(AddressSpace::default()); - let i64_ty = self.context.i64_type(); - let i32_ty = self.context.i32_type(); - - // Helper: long bpf_probe_read_user(void *dst, u32 size, const void *src) - let dst_ptr = self - .builder - .build_pointer_cast(var_data_ptr, ptr_ty, "md_dst_ptr") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let base_src_ptr = self - .builder - .build_int_to_ptr(address.value, ptr_ty, "md_src_ptr") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let offsets_found = address.offsets_found; - let not_found = self - .builder - .build_not(offsets_found, "md_offsets_miss") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let null_ptr = ptr_ty.const_null(); - let src_ptr = self - .builder - .build_select::, _>( - offsets_found, - base_src_ptr.into(), - null_ptr.into(), - "md_src_or_null", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - .into_pointer_value(); - let len_const = i32_ty.const_int(*len as u64, false); - let zero_i32 = i32_ty.const_zero(); - let effective_len = self - .builder - .build_select::, _>( - offsets_found, - len_const.into(), - zero_i32.into(), - "md_len_or_zero", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - .into_int_value(); - let ret = self - .create_bpf_helper_call( - aya_ebpf_bindings::bindings::bpf_func_id::BPF_FUNC_probe_read_user - as u64, - &[dst_ptr.into(), effective_len.into(), src_ptr.into()], - i64_ty.into(), - "probe_read_user_memdump", - )? - .into_int_value(); - - // Branch on ret == 0 and offsets available - let ok_pred = self - .builder - .build_int_compare( - inkwell::IntPredicate::EQ, - ret, - i64_ty.const_zero(), - "md_ok", - ) - .map_err(|e| CodeGenError::Builder(e.to_string()))?; - let ok = self - .builder - .build_and(ok_pred, offsets_found, "md_ok_with_offsets") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let func = self.current_function("compile memdump status branch")?; - let ok_b = self.context.append_basic_block(func, "md_ok"); - let err_b = self.context.append_basic_block(func, "md_err"); - let cont_b = self.context.append_basic_block(func, "md_cont"); - self.builder - .build_conditional_branch(ok, ok_b, err_b) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - // ok: nothing extra to do - self.builder.position_at_end(ok_b); - self.builder - .build_unconditional_branch(cont_b) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - // err: either offsets missing or helper failure - self.builder.position_at_end(err_b); - let offsets_err_b = self.context.append_basic_block(func, "md_offsets_err"); - let helper_err_b = self.context.append_basic_block(func, "md_helper_err"); - self.builder - .build_conditional_branch(not_found, offsets_err_b, helper_err_b) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.builder.position_at_end(offsets_err_b); - self.builder - .build_store( - apl_ptr, - self.context - .i8_type() - .const_int(VariableStatus::OffsetsUnavailable as u64, false), - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.mark_any_fail()?; - self.builder - .build_unconditional_branch(cont_b) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.builder.position_at_end(helper_err_b); - self.builder - .build_store( - apl_ptr, - self.context - .i8_type() - .const_int(VariableStatus::ReadError as u64, false), - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - // write errno + addr (12 bytes) to var_data_ptr; reserved sizing ensures this fits - let errno_ptr = self - .builder - .build_pointer_cast( - var_data_ptr, - self.context.ptr_type(AddressSpace::default()), - "errno_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let errno = self.build_errno_i32(ret, "errno_i32")?; - self.builder - .build_store(errno_ptr, errno) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let addr_ptr_i8 = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - var_data_ptr, - &[self.context.i32_type().const_int(4, false)], - "addr_ptr_i8", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - }; - let addr_ptr = self - .builder - .build_pointer_cast( - addr_ptr_i8, - self.context.ptr_type(AddressSpace::default()), - "addr_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.builder - .build_store(addr_ptr, address.value) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.mark_any_fail()?; - self.builder - .build_unconditional_branch(cont_b) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.builder.position_at_end(cont_b); - } - ComplexArgSource::MemDumpDynamic { - address, - len_value, - max_len: _, - } => { - // Clamp runtime read to effective reserved length for this arg - let eff_max_len = effective_reserved[arg_index] as u32; - // Read up to rlen=min(len_value, max_len) into helper buffer, then copy bytes into payload - let i32_ty = self.context.i32_type(); - let rlen_i32 = if len_value.get_type().get_bit_width() > 32 { - self.builder - .build_int_truncate(*len_value, i32_ty, "mdd_len_trunc") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - } else if len_value.get_type().get_bit_width() < 32 { - self.builder - .build_int_z_extend(*len_value, i32_ty, "mdd_len_zext") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - } else { - *len_value - }; - // clamp negative to 0 - let zero_i32 = i32_ty.const_zero(); - let is_neg = self - .builder - .build_int_compare( - inkwell::IntPredicate::SLT, - rlen_i32, - zero_i32, - "mdd_len_neg", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let rlen_nn = self - .builder - .build_select(is_neg, zero_i32, rlen_i32, "mdd_len_nn") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - .into_int_value(); - - // Bound length by the reserved space (already ensures >= 12B when possible) - let max_const = i32_ty.const_int(eff_max_len as u64, false); - let gt = self - .builder - .build_int_compare(inkwell::IntPredicate::UGT, rlen_nn, max_const, "mdd_gt") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let sel_len = self - .builder - .build_select(gt, max_const, rlen_nn, "mdd_rlen") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - .into_int_value(); - - // If effective length is zero, mark status and skip read. - let func = self.current_function("compile memdump dynamic length branch")?; - let zero_b = self.context.append_basic_block(func, "mdd_len_zero"); - let read_b = self.context.append_basic_block(func, "mdd_len_read"); - let cont_b = self.context.append_basic_block(func, "mdd_cont"); - let is_zero = self - .builder - .build_int_compare( - inkwell::IntPredicate::EQ, - sel_len, - i32_ty.const_zero(), - "mdd_len_zero", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.builder - .build_conditional_branch(is_zero, zero_b, read_b) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - - // Zero-length branch: set status=ZeroLength and continue. - self.builder.position_at_end(zero_b); - self.builder - .build_store( - apl_ptr, - self.context - .i8_type() - .const_int(VariableStatus::ZeroLength as u64, false), - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.builder - .build_unconditional_branch(cont_b) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - - // Non-zero path: perform probe_read_user directly into var_data_ptr - self.builder.position_at_end(read_b); - let dst_ptr = self - .builder - .build_bit_cast( - var_data_ptr, - self.context.ptr_type(AddressSpace::default()), - "mdd_dst_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let ptr_ty = self.context.ptr_type(AddressSpace::default()); - let base_src_ptr = self - .builder - .build_int_to_ptr(address.value, ptr_ty, "mdd_src_ptr") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let offsets_found = address.offsets_found; - let not_found = self - .builder - .build_not(offsets_found, "mdd_dyn_offsets_miss") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let null_ptr = ptr_ty.const_null(); - let src_ptr = self - .builder - .build_select::, _>( - offsets_found, - base_src_ptr.into(), - null_ptr.into(), - "mdd_src_or_null", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - .into_pointer_value(); - let zero_i32 = self.context.i32_type().const_zero(); - let effective_len = self - .builder - .build_select::, _>( - offsets_found, - sel_len.into(), - zero_i32.into(), - "mdd_len_or_zero", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - .into_int_value(); - let ret = self - .create_bpf_helper_call( - BPF_FUNC_probe_read_user as u64, - &[dst_ptr, effective_len.into(), src_ptr.into()], - self.context.i64_type().into(), - "probe_read_user_dyn", - )? - .into_int_value(); - let ok_pred = self - .builder - .build_int_compare( - inkwell::IntPredicate::EQ, - ret, - self.context.i64_type().const_zero(), - "mdd_ok", - ) - .map_err(|e| CodeGenError::Builder(e.to_string()))?; - let ok = self - .builder - .build_and(ok_pred, offsets_found, "mdd_ok_with_offsets") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let ok_b = self.context.append_basic_block(func, "mdd_ok"); - let err_b = self.context.append_basic_block(func, "mdd_err"); - self.builder - .build_conditional_branch(ok, ok_b, err_b) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - // ok: data already in var_data_ptr - self.builder.position_at_end(ok_b); - self.builder - .build_unconditional_branch(cont_b) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - // err: status+errno+addr (clamped by reserved sizing) - self.builder.position_at_end(err_b); - let offsets_err_b = self.context.append_basic_block(func, "mdd_offsets_err"); - let helper_err_b = self.context.append_basic_block(func, "mdd_helper_err"); - self.builder - .build_conditional_branch(not_found, offsets_err_b, helper_err_b) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.builder.position_at_end(offsets_err_b); - self.builder - .build_store( - apl_ptr, - self.context - .i8_type() - .const_int(VariableStatus::OffsetsUnavailable as u64, false), - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.mark_any_fail()?; - self.builder - .build_unconditional_branch(cont_b) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.builder.position_at_end(helper_err_b); - self.builder - .build_store( - apl_ptr, - self.context - .i8_type() - .const_int(VariableStatus::ReadError as u64, false), - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - if eff_max_len >= 4 { - let errno_ptr = self - .builder - .build_pointer_cast( - var_data_ptr, - self.context.ptr_type(AddressSpace::default()), - "mdd_errno_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let errno = self.build_errno_i32(ret, "mdd_errno_i32")?; - self.builder - .build_store(errno_ptr, errno) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - } - if eff_max_len as usize >= DYNAMIC_READ_ERROR_PAYLOAD_LEN { - let addr_ptr_i8 = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - var_data_ptr, - &[self.context.i32_type().const_int(4, false)], - "mdd_addr_ptr_i8", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - }; - let addr_ptr = self - .builder - .build_pointer_cast( - addr_ptr_i8, - self.context.ptr_type(AddressSpace::default()), - "mdd_addr_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.builder - .build_store(addr_ptr, address.value) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - } - self.mark_any_fail()?; - self.builder - .build_unconditional_branch(cont_b) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.builder.position_at_end(cont_b); - } - ComplexArgSource::ComputedInt { value, byte_len } => { - // Write computed integer into payload buffer based on requested byte_len - // Ensure the destination pointer element type matches the stored value type. - match *byte_len { - 1 => { - let bitw = value.get_type().get_bit_width(); - let v = if bitw == 1 { - // Bool: zero-extend to keep 0/1 in payload - self.builder - .build_int_z_extend( - *value, - self.context.i8_type(), - "expr_zext_bool_i8", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - } else if bitw < 8 { - self.builder - .build_int_s_extend( - *value, - self.context.i8_type(), - "expr_sext_i8", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - } else if bitw > 8 { - // wider than i8 -> truncate - self.builder - .build_int_truncate( - *value, - self.context.i8_type(), - "expr_trunc_i8", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - } else { - // exactly i8 - *value - }; - // var_data_ptr is i8* already; store directly - self.builder - .build_store(var_data_ptr, v) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - } - 2 => { - let bitw = value.get_type().get_bit_width(); - let v = if bitw < 16 { - self.builder - .build_int_s_extend( - *value, - self.context.i16_type(), - "expr_sext_i16", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - } else if bitw > 16 { - self.builder - .build_int_truncate( - *value, - self.context.i16_type(), - "expr_trunc_i16", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - } else { - // equal width: i16 - *value - }; - let i16_ptr_ty = self.context.ptr_type(AddressSpace::default()); - let cast_ptr = self - .builder - .build_pointer_cast(var_data_ptr, i16_ptr_ty, "expr_i16_ptr") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.builder - .build_store(cast_ptr, v) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - } - 4 => { - let bitw = value.get_type().get_bit_width(); - let v = if bitw < 32 { - self.builder - .build_int_s_extend( - *value, - self.context.i32_type(), - "expr_sext_i32", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - } else if bitw > 32 { - self.builder - .build_int_truncate( - *value, - self.context.i32_type(), - "expr_trunc_i32", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - } else { - // equal width: i32 - *value - }; - let i32_ptr_ty = self.context.ptr_type(AddressSpace::default()); - let cast_ptr = self - .builder - .build_pointer_cast(var_data_ptr, i32_ptr_ty, "expr_i32_ptr") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.builder - .build_store(cast_ptr, v) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - } - 8 => { - let v64 = if value.get_type().get_bit_width() < 64 { - self.builder - .build_int_s_extend( - *value, - self.context.i64_type(), - "expr_sext", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - } else { - *value - }; - let i64_ptr_ty = self.context.ptr_type(AddressSpace::default()); - let cast_ptr = self - .builder - .build_pointer_cast(var_data_ptr, i64_ptr_ty, "expr_i64_ptr") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.builder - .build_store(cast_ptr, v64) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - } - n => { - // Fallback: write the lowest n bytes little-endian - // Truncate/extend to 64-bit, then emit byte stores - let v64 = if value.get_type().get_bit_width() < 64 { - self.builder - .build_int_z_extend( - *value, - self.context.i64_type(), - "expr_zext_fallback", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - } else { - *value - }; - for i in 0..n { - // Extract byte i - let shift = - self.context.i64_type().const_int((i * 8) as u64, false); - let shifted = self - .builder - .build_right_shift(v64, shift, false, &format!("expr_shr_{i}")) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let byte = self - .builder - .build_int_truncate( - shifted, - self.context.i8_type(), - &format!("expr_byte_{i}"), - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let byte_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - var_data_ptr, - &[self.context.i32_type().const_int(i as u64, false)], - &format!("expr_byte_ptr_{i}"), - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - }; - self.builder - .build_store(byte_ptr, byte) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - } - } - } - } - ComplexArgSource::RuntimeRead { - address, - dwarf_type, - module_for_offsets, - } => { - // Read from user memory at runtime via BPF helper - let ptr_type = self.context.ptr_type(AddressSpace::default()); - let i32_type = self.context.i32_type(); - let i64_type = self.context.i64_type(); - let dst_ptr = self - .builder - .build_bit_cast(var_data_ptr, ptr_type, "dst_ptr") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let size_val = i32_type.const_int(a.data_len as u64, false); - let src_addr = self.resolve_planned_address( - address, - Some(apl_ptr), - module_for_offsets.as_deref(), - )?; - let offsets_found = src_addr.offsets_found; - let current_fn = self.current_function("compile complex variable read")?; - let cont2_block = self.context.append_basic_block(current_fn, "after_read"); - let skip_block = self.context.append_basic_block(current_fn, "offsets_skip"); - let found_block = self.context.append_basic_block(current_fn, "offsets_found"); - self.builder - .build_conditional_branch(offsets_found, found_block, skip_block) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - - // Offsets missing: record failure and continue without helper access. - self.builder.position_at_end(skip_block); - self.mark_any_fail()?; - self.builder - .build_unconditional_branch(cont2_block) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - - // Offsets found: proceed with null check and helper call. - self.builder.position_at_end(found_block); - let src_ptr = self - .builder - .build_int_to_ptr(src_addr.value, ptr_type, "src_ptr") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - - // status_ptr was stored in apl_ptr earlier (we named it status_ptr) - // Build NULL check - let zero64 = i64_type.const_zero(); - let is_null = self - .builder - .build_int_compare( - inkwell::IntPredicate::EQ, - src_addr.value, - zero64, - "is_null", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let null_block = self.context.append_basic_block(current_fn, "null_deref"); - let read_block = self.context.append_basic_block(current_fn, "read_user"); - self.builder - .build_conditional_branch(is_null, null_block, read_block) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - - // NULL path: status=1, keep reserved_len in header, no data write (buffer pre-zeroed) - self.builder.position_at_end(null_block); - self.builder - .build_store( - apl_ptr, - self.context - .i8_type() - .const_int(VariableStatus::NullDeref as u64, false), - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.mark_any_fail()?; - self.builder - .build_unconditional_branch(cont2_block) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - - // Read path - self.builder.position_at_end(read_block); - let ret = self - .create_bpf_helper_call( - BPF_FUNC_probe_read_user as u64, - &[dst_ptr, size_val.into(), src_ptr.into()], - i32_type.into(), - "probe_read_user", - )? - .into_int_value(); - let is_err = self - .builder - .build_int_compare( - inkwell::IntPredicate::SLT, - ret, - i32_type.const_zero(), - "ret_lt_zero", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let err_block = self.context.append_basic_block(current_fn, "read_err"); - let ok_block = self.context.append_basic_block(current_fn, "read_ok"); - self.builder - .build_conditional_branch(is_err, err_block, ok_block) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - - // Error branch: status=2 (read_user failed); write errno+addr payload at start; header keeps reserved_len - self.builder.position_at_end(err_block); - self.builder - .build_store( - apl_ptr, - self.context - .i8_type() - .const_int(VariableStatus::ReadError as u64, false), - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - // write errno at [0..4] - let i32_ptr = self - .builder - .build_pointer_cast( - var_data_ptr, - self.context.ptr_type(AddressSpace::default()), - "errno_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to cast errno ptr: {e}")) - })?; - self.builder.build_store(i32_ptr, ret).map_err(|e| { - CodeGenError::LLVMError(format!("Failed to store errno: {e}")) - })?; - // write addr at [4..12] - let addr_ptr_i8 = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - var_data_ptr, - &[i32_type.const_int(4, false)], - "addr_ptr_i8", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get addr gep: {e}")) - })? - }; - let addr_ptr = self - .builder - .build_pointer_cast( - addr_ptr_i8, - self.context.ptr_type(AddressSpace::default()), - "addr_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to cast addr ptr: {e}")) - })?; - let src_as_i64 = src_addr.value; - self.builder - .build_store(addr_ptr, src_as_i64) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to store addr: {e}")) - })?; - self.mark_any_fail()?; - self.builder - .build_unconditional_branch(cont2_block) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - - // OK branch: success or truncated (header keeps reserved_len) - self.builder.position_at_end(ok_block); - if a.data_len < dwarf_type.size() as usize { - self.builder - .build_store( - apl_ptr, - self.context - .i8_type() - .const_int(VariableStatus::Truncated as u64, false), - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.mark_any_success()?; - self.mark_any_fail()?; - } else { - self.mark_any_success()?; - } - self.builder - .build_unconditional_branch(cont2_block) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - - self.builder.position_at_end(cont2_block); - } - ComplexArgSource::AddressValue { - address, - module_for_offsets, - } => { - let addr = self.resolve_planned_address( - address, - Some(apl_ptr), - module_for_offsets.as_deref(), - )?; - let cast_ptr = self - .builder - .build_pointer_cast( - var_data_ptr, - self.context.ptr_type(AddressSpace::default()), - "addr_store_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.builder - .build_store(cast_ptr, addr.value) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - // header already set to reserved_len (8) - } - } - // Advance compile-time offset by header_len + reserved_len - offset += 2 + 2 + 1 + 1 + a.access_path.len() + 2 + reserved_len; - } - - // Already accumulated; EndInstruction will send the whole event - Ok(()) - } - - /// Generate eBPF code for PrintStringIndex instruction - pub fn generate_print_string_index(&mut self, string_index: u16) -> Result<()> { - info!( - "Generating PrintStringIndex instruction: index={}", - string_index - ); - - // Allocate instruction structure on eBPF stack - // Reserve space in accumulation buffer for this instruction - let inst_buffer = self - .reserve_instruction_region_or_return_zero( - (std::mem::size_of::() - + std::mem::size_of::()) as u64, - )? - .into_value_after_runtime_returns(); - - // Clear memory with static size - let _inst_size = self.context.i64_type().const_int( - (std::mem::size_of::() - + std::mem::size_of::()) - as u64, - false, - ); - // Avoid memset on eBPF; global buffer is zero-initialized and we write fields explicitly. - - // Fill instruction header using byte offsets - // inst_type at offset 0 (first field of InstructionHeader) - let inst_type_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - inst_buffer, - &[self.context.i32_type().const_int( - std::mem::offset_of!(InstructionHeader, inst_type) as u64, - false, - )], - "inst_type_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to get inst_type GEP: {e}")))? - }; - let inst_type_val = self - .context - .i8_type() - .const_int(InstructionType::PrintStringIndex as u64, false); - self.builder - .build_store(inst_type_ptr, inst_type_val) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store inst_type: {e}")))?; - - let data_length_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - inst_buffer, - &[self.context.i32_type().const_int( - std::mem::offset_of!(InstructionHeader, data_length) as u64, - false, - )], - "data_length_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get data_length GEP: {e}")) - })? - }; - let data_length_i16_ptr = self - .builder - .build_pointer_cast( - data_length_ptr, - self.context.ptr_type(AddressSpace::default()), - "data_length_i16_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast data_length ptr: {e}")))?; - let data_length_val = self - .context - .i16_type() - .const_int(std::mem::size_of::() as u64, false); - self.builder - .build_store(data_length_i16_ptr, data_length_val) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store data_length: {e}")))?; - - // Fill string index data (after InstructionHeader) - let string_index_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - inst_buffer, - &[self - .context - .i32_type() - .const_int(std::mem::size_of::() as u64, false)], - "string_index_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get string_index GEP: {e}")) - })? - }; - let string_index_i16_ptr = self - .builder - .build_pointer_cast( - string_index_ptr, - self.context.ptr_type(AddressSpace::default()), - "string_index_i16_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to cast string_index ptr: {e}")) - })?; - let string_index_val = self - .context - .i16_type() - .const_int(string_index as u64, false); - self.builder - .build_store(string_index_i16_ptr, string_index_val) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store string_index: {e}")))?; - - // Already accumulated; EndInstruction will send the whole event - Ok(()) - } - - /// Generate ExprError instruction with expression string index and error code/flags - pub fn generate_expr_error( - &mut self, - expr_string_index: u16, - error_code_iv: inkwell::values::IntValue<'ctx>, - flags_iv: inkwell::values::IntValue<'ctx>, - failing_addr_iv: inkwell::values::IntValue<'ctx>, - ) -> Result<()> { - // Reserve space in accumulation buffer for this instruction - let inst_buffer = self - .reserve_instruction_region_or_return_zero( - (std::mem::size_of::() - + std::mem::size_of::()) - as u64, - )? - .into_value_after_runtime_returns(); - - // Store instruction type at offset 0 - let inst_type_val = self - .context - .i8_type() - .const_int(InstructionType::ExprError as u64, false); - self.builder - .build_store(inst_buffer, inst_type_val) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store inst_type: {e}")))?; - - // data_length - let data_length_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - inst_buffer, - &[self.context.i32_type().const_int( - std::mem::offset_of!(InstructionHeader, data_length) as u64, - false, - )], - "exprerr_data_length_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get data_length GEP: {e}")) - })? - }; - let data_length_i16_ptr = self - .builder - .build_pointer_cast( - data_length_ptr, - self.context.ptr_type(AddressSpace::default()), - "exprerr_data_length_i16_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast data_length ptr: {e}")))?; - let data_length_val = self.context.i16_type().const_int( - std::mem::size_of::() as u64, - false, - ); - self.builder - .build_store(data_length_i16_ptr, data_length_val) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store data_length: {e}")))?; - - // Payload fields after header - // string_index at offset sizeof(InstructionHeader) + 0 (u16) - let si_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - inst_buffer, - &[self - .context - .i32_type() - .const_int(std::mem::size_of::() as u64, false)], - "exprerr_si_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get string_index GEP: {e}")) - })? - }; - let si_i16_ptr = self - .builder - .build_pointer_cast( - si_ptr, - self.context.ptr_type(AddressSpace::default()), - "exprerr_si_i16_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to cast string_index ptr: {e}")) - })?; - let si_val = self - .context - .i16_type() - .const_int(expr_string_index as u64, false); - self.builder - .build_store(si_i16_ptr, si_val) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store string_index: {e}")))?; - - // error_code at +2, flags at +3 - let ec_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - inst_buffer, - &[self - .context - .i32_type() - .const_int((std::mem::size_of::() + 2) as u64, false)], - "exprerr_ec_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get error_code GEP: {e}")) - })? - }; - // Truncate/extend runtime error code to i8 - let ec_i8 = if error_code_iv.get_type().get_bit_width() == 8 { - error_code_iv - } else if error_code_iv.get_type().get_bit_width() > 8 { - self.builder - .build_int_truncate(error_code_iv, self.context.i8_type(), "ec_trunc") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - } else { - self.builder - .build_int_z_extend(error_code_iv, self.context.i8_type(), "ec_zext") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - }; - self.builder - .build_store(ec_ptr, ec_i8) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store error_code: {e}")))?; - let fl_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - inst_buffer, - &[self - .context - .i32_type() - .const_int((std::mem::size_of::() + 3) as u64, false)], - "exprerr_flags_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to get flags GEP: {e}")))? - }; - // Truncate/extend runtime flags to i8 - let fl_i8 = if flags_iv.get_type().get_bit_width() == 8 { - flags_iv - } else if flags_iv.get_type().get_bit_width() > 8 { - self.builder - .build_int_truncate(flags_iv, self.context.i8_type(), "fl_trunc") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - } else { - self.builder - .build_int_z_extend(flags_iv, self.context.i8_type(), "fl_zext") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - }; - self.builder - .build_store(fl_ptr, fl_i8) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store flags: {e}")))?; - - // failing_addr at +4 (u64) - let addr_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - inst_buffer, - &[self - .context - .i32_type() - .const_int((std::mem::size_of::() + 4) as u64, false)], - "exprerr_addr_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to get addr GEP: {e}")))? - }; - let addr_i64 = if failing_addr_iv.get_type().get_bit_width() == 64 { - failing_addr_iv - } else if failing_addr_iv.get_type().get_bit_width() > 64 { - self.builder - .build_int_truncate(failing_addr_iv, self.context.i64_type(), "addr_trunc") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - } else { - self.builder - .build_int_z_extend(failing_addr_iv, self.context.i64_type(), "addr_zext") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - }; - let addr_ptr_cast = self - .builder - .build_pointer_cast( - addr_ptr, - self.context.ptr_type(AddressSpace::default()), - "exprerr_addr_i64_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.builder - .build_store(addr_ptr_cast, addr_i64) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store failing_addr: {e}")))?; - - // Already accumulated; EndInstruction will send the whole event - Ok(()) - } - - /// Generate eBPF code for PrintVariableIndex instruction - pub fn generate_print_variable_index( - &mut self, - var_name_index: u16, - type_encoding: TypeKind, - var_name: &str, - ) -> Result<()> { - info!( - "Generating PrintVariableIndex instruction: var_name_index={}, type={:?}, var_name={}", - var_name_index, type_encoding, var_name - ); - - // Resolve type_index from DWARF if available; otherwise synthesize from TypeKind - let type_index = match self.query_dwarf_for_variable(var_name)? { - Some(var) => match var.dwarf_type { - Some(ref t) => self.trace_context.add_type(t.clone()), - None => self.add_synthesized_type_index_for_kind(type_encoding), - }, - None => { - // Variable not found via DWARF; fall back to synthesized type info based on TypeKind - self.add_synthesized_type_index_for_kind(type_encoding) - } - }; - - self.generate_successful_variable_instruction( - var_name_index, - type_encoding, - type_index, - var_name, - ) - } - - /// Generate successful variable instruction with data - fn generate_successful_variable_instruction( - &mut self, - var_name_index: u16, - type_encoding: TypeKind, - type_index: u16, - var_name: &str, - ) -> Result<()> { - // Determine data size based on type - let data_size = match type_encoding { - TypeKind::U8 | TypeKind::I8 | TypeKind::Bool | TypeKind::Char => 1, - TypeKind::U16 | TypeKind::I16 => 2, - TypeKind::U32 | TypeKind::I32 | TypeKind::F32 => 4, - TypeKind::U64 | TypeKind::I64 | TypeKind::F64 | TypeKind::Pointer => 8, - _ => 8, // Default to 8 bytes for complex types - }; - - // Reserve space directly in per-CPU accumulation buffer - let inst_buffer = self - .reserve_instruction_region_or_return_zero( - (std::mem::size_of::() - + std::mem::size_of::() - + data_size as usize) as u64, - )? - .into_value_after_runtime_returns(); - - // Avoid memset; global buffer is zero-initialized - - // Store instruction type at offset 0 - let inst_type_val = self - .context - .i8_type() - .const_int(InstructionType::PrintVariableIndex as u64, false); - self.builder - .build_store(inst_buffer, inst_type_val) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store inst_type: {e}")))?; - - // Store data_length field of InstructionHeader - let data_length_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - inst_buffer, - &[self.context.i32_type().const_int( - std::mem::offset_of!(InstructionHeader, data_length) as u64, - false, - )], - "data_length_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get data_length GEP: {e}")) - })? - }; - let data_length_i16_ptr = self - .builder - .build_pointer_cast( - data_length_ptr, - self.context.ptr_type(AddressSpace::default()), - "data_length_i16_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast data_length ptr: {e}")))?; - let total_data_length = std::mem::size_of::() + data_size as usize; - let data_length_val = self - .context - .i16_type() - .const_int(total_data_length as u64, false); - self.builder - .build_store(data_length_i16_ptr, data_length_val) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store data_length: {e}")))?; - - // Write PrintVariableIndexData after InstructionHeader - let variable_data_start = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - inst_buffer, - &[self - .context - .i32_type() - .const_int(std::mem::size_of::() as u64, false)], - "variable_data_start", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get variable_data_start GEP: {e}")) - })? - }; - - // Store var_name_index using correct offset - let var_name_index_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - variable_data_start, - &[self.context.i32_type().const_int( - std::mem::offset_of!(PrintVariableIndexData, var_name_index) as u64, - false, - )], - "var_name_index_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get var_name_index GEP: {e}")) - })? - }; - let var_name_index_i16_ptr = self - .builder - .build_pointer_cast( - var_name_index_ptr, - self.context.ptr_type(AddressSpace::default()), - "var_name_index_i16_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to cast var_name_index ptr: {e}")) - })?; - let var_name_index_val = self - .context - .i16_type() - .const_int(var_name_index as u64, false); - self.builder - .build_store(var_name_index_i16_ptr, var_name_index_val) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store var_name_index: {e}")))?; - - // Store type_encoding using correct offset - let type_encoding_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - variable_data_start, - &[self.context.i32_type().const_int( - std::mem::offset_of!(PrintVariableIndexData, type_encoding) as u64, - false, - )], - "type_encoding_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get type_encoding GEP: {e}")) - })? - }; - let type_encoding_val = self - .context - .i8_type() - .const_int(type_encoding as u8 as u64, false); - self.builder - .build_store(type_encoding_ptr, type_encoding_val) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store type_encoding: {e}")))?; - - // Store data_len using correct offset - let data_len_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - variable_data_start, - &[self.context.i32_type().const_int( - std::mem::offset_of!(PrintVariableIndexData, data_len) as u64, - false, - )], - "data_len_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to get data_len GEP: {e}")))? - }; - let data_len_i16_ptr = self - .builder - .build_pointer_cast( - data_len_ptr, - self.context.ptr_type(AddressSpace::default()), - "data_len_i16_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast data_len ptr: {e}")))?; - let data_len_val = self.context.i16_type().const_int(data_size as u64, false); // Store as u16 - self.builder - .build_store(data_len_i16_ptr, data_len_val) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store data_len: {e}")))?; - - // Store type_index using correct offset - let type_index_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - variable_data_start, - &[self.context.i32_type().const_int( - std::mem::offset_of!(PrintVariableIndexData, type_index) as u64, - false, - )], - "type_index_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get type_index GEP: {e}")) - })? - }; - let type_index_i16_ptr = self - .builder - .build_pointer_cast( - type_index_ptr, - self.context.ptr_type(AddressSpace::default()), - "type_index_i16_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast type_index ptr: {e}")))?; - let type_index_val = self.context.i16_type().const_int(type_index as u64, false); - self.builder - .build_store(type_index_i16_ptr, type_index_val) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store type_index: {e}")))?; - - // Store status (set to 0) - let status_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - variable_data_start, - &[self.context.i32_type().const_int( - std::mem::offset_of!(PrintVariableIndexData, status) as u64, - false, - )], - "status_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to get status GEP: {e}")))? - }; - let status_val = self - .context - .i8_type() - .const_int(VariableStatus::Ok as u64, false); - self.builder - .build_store(status_ptr, status_val) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store status: {e}")))?; - - let var_data = self.resolve_variable_value(var_name, type_encoding, Some(status_ptr))?; - - // Store actual variable data after PrintVariableIndexData structure - let var_data_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - variable_data_start, - &[self - .context - .i32_type() - .const_int(std::mem::size_of::() as u64, false)], - "var_data_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to get var_data GEP: {e}")))? - }; - - // Store the runtime variable value based on data size - // The var_data contains the LLVM IR value (from register/memory access) - match data_size { - 1 => { - // Store as i8 - let truncated = match var_data { - BasicValueEnum::IntValue(int_val) => self - .builder - .build_int_truncate(int_val, self.context.i8_type(), "truncated_i8") - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to truncate to i8: {e}")) - })?, - _ => { - return Err(CodeGenError::LLVMError( - "Expected integer value for integer type".to_string(), - )); - } - }; - self.builder - .build_store(var_data_ptr, truncated) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to store i8 data: {e}")) - })?; - } - 2 => { - // Store as i16 - let truncated = match var_data { - BasicValueEnum::IntValue(int_val) => self - .builder - .build_int_truncate(int_val, self.context.i16_type(), "truncated_i16") - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to truncate to i16: {e}")) - })?, - _ => { - return Err(CodeGenError::LLVMError( - "Expected integer value for integer type".to_string(), - )); - } - }; - let i16_ptr = self - .builder - .build_pointer_cast( - var_data_ptr, - self.context.ptr_type(AddressSpace::default()), - "i16_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to cast to i16 ptr: {e}")) - })?; - self.builder.build_store(i16_ptr, truncated).map_err(|e| { - CodeGenError::LLVMError(format!("Failed to store i16 data: {e}")) - })?; - } - 4 => { - // Store as i32 or f32 - match var_data { - BasicValueEnum::IntValue(int_val) => { - let truncated = self - .builder - .build_int_truncate(int_val, self.context.i32_type(), "truncated_i32") - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to truncate to i32: {e}")) - })?; - let i32_ptr = self - .builder - .build_pointer_cast( - var_data_ptr, - self.context.ptr_type(AddressSpace::default()), - "i32_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to cast to i32 ptr: {e}")) - })?; - self.builder.build_store(i32_ptr, truncated).map_err(|e| { - CodeGenError::LLVMError(format!("Failed to store i32 data: {e}")) - })?; - } - BasicValueEnum::FloatValue(float_val) => { - let f32_ptr = self - .builder - .build_pointer_cast( - var_data_ptr, - self.context.ptr_type(AddressSpace::default()), - "f32_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to cast to f32 ptr: {e}")) - })?; - self.builder.build_store(f32_ptr, float_val).map_err(|e| { - CodeGenError::LLVMError(format!("Failed to store f32 data: {e}")) - })?; - } - _ => { - return Err(CodeGenError::LLVMError( - "Expected integer or float value for 4-byte type".to_string(), - )); - } - } - } - 8 => { - // Store as i64, f64, or pointer - match var_data { - BasicValueEnum::IntValue(int_val) => { - let i64_ptr = self - .builder - .build_pointer_cast( - var_data_ptr, - self.context.ptr_type(AddressSpace::default()), - "i64_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to cast to i64 ptr: {e}")) - })?; - self.builder.build_store(i64_ptr, int_val).map_err(|e| { - CodeGenError::LLVMError(format!("Failed to store i64 data: {e}")) - })?; - } - BasicValueEnum::FloatValue(float_val) => { - let f64_ptr = self - .builder - .build_pointer_cast( - var_data_ptr, - self.context.ptr_type(AddressSpace::default()), - "f64_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to cast to f64 ptr: {e}")) - })?; - self.builder.build_store(f64_ptr, float_val).map_err(|e| { - CodeGenError::LLVMError(format!("Failed to store f64 data: {e}")) - })?; - } - BasicValueEnum::PointerValue(ptr_val) => { - // Store pointer as u64 - let ptr_int = self - .builder - .build_ptr_to_int(ptr_val, self.context.i64_type(), "ptr_as_int") - .map_err(|e| { - CodeGenError::LLVMError(format!( - "Failed to convert ptr to int: {e}" - )) - })?; - let i64_ptr = self - .builder - .build_pointer_cast( - var_data_ptr, - self.context.ptr_type(AddressSpace::default()), - "i64_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to cast to i64 ptr: {e}")) - })?; - self.builder.build_store(i64_ptr, ptr_int).map_err(|e| { - CodeGenError::LLVMError(format!("Failed to store pointer data: {e}")) - })?; - } - _ => { - return Err(CodeGenError::LLVMError( - "Expected integer, float, or pointer value for 8-byte type".to_string(), - )); - } - } - } - _ => { - return Err(CodeGenError::LLVMError(format!( - "Unsupported data size: {data_size}" - ))); - } - } - - // Already accumulated; EndInstruction will send the whole event - Ok(()) - } - - // PrintVariableError instruction has been removed; compile-time errors are returned as Err, - // runtime errors are carried via per-variable status in Print* instructions. - - /// Generate Backtrace instruction - pub fn generate_backtrace_instruction(&mut self, depth: u8) -> Result<()> { - info!("Generating Backtrace instruction: depth={}", depth); - - // Reserve space directly for Backtrace instruction - let inst_buffer = self - .reserve_instruction_region_or_return_zero( - (std::mem::size_of::() + std::mem::size_of::()) - as u64, - )? - .into_value_after_runtime_returns(); - - // Write InstructionHeader.inst_type - let inst_type_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - inst_buffer, - &[self.context.i32_type().const_int( - std::mem::offset_of!(InstructionHeader, inst_type) as u64, - false, - )], - "bt_inst_type_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to get inst_type GEP: {e}")))? - }; - let inst_type_val = self - .context - .i8_type() - .const_int(InstructionType::Backtrace as u64, false); - self.builder - .build_store(inst_type_ptr, inst_type_val) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store inst_type: {e}")))?; - - // Write InstructionHeader.data_length (u16) - let data_length_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - inst_buffer, - &[self.context.i32_type().const_int( - std::mem::offset_of!(InstructionHeader, data_length) as u64, - false, - )], - "bt_data_length_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get data_length GEP: {e}")) - })? - }; - let data_length_i16_ptr = self - .builder - .build_pointer_cast( - data_length_ptr, - self.context.ptr_type(AddressSpace::default()), - "bt_data_length_i16_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast data_length ptr: {e}")))?; - let dl_val = self - .context - .i16_type() - .const_int(std::mem::size_of::() as u64, false); - self.builder - .build_store(data_length_i16_ptr, dl_val) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store data_length: {e}")))?; - - // Already accumulated; EndInstruction will send the whole event. Depth currently unused at BPF level. - Ok(()) - } - - /// Resolve variable value from script variables first, then DWARF - fn resolve_variable_value( - &mut self, - var_name: &str, - type_encoding: TypeKind, - status_ptr: Option>, - ) -> Result> { - info!( - "Resolving variable value: {} ({:?})", - var_name, type_encoding - ); - - // 1) Script variable first - if self.variable_exists(var_name) { - info!("Found script variable for '{}', loading value", var_name); - return self.load_variable(var_name); - } - - // 2) DWARF variable as fallback - match self.query_dwarf_for_variable(var_name)? { - Some(var_info) => { - info!( - "Found DWARF variable read plan: {} availability={:?}", - var_name, var_info.availability - ); - - // Require DWARF type information - var_info.dwarf_type.as_ref().ok_or_else(|| { - CodeGenError::DwarfError(format!( - "Variable '{var_name}' has no type information in DWARF" - )) - })?; - - let compile_context = self.get_compile_time_context()?; - self.variable_read_plan_to_llvm_value( - &var_info, - compile_context.pc_address, - status_ptr, - ) - } - None => { - let compile_context = self.get_compile_time_context()?; - warn!( - "Variable '{}' not found in DWARF at address 0x{:x}", - var_name, compile_context.pc_address - ); - Err(CodeGenError::VariableNotFound(var_name.to_string())) - } - } - } - - /// Generate PrintComplexVariable instruction and copy data at runtime using probe_read_user - fn generate_print_complex_variable_runtime( - &mut self, - meta: PrintVarRuntimeMeta, - address: &ghostscope_dwarf::PlannedAddress, - dwarf_type: &ghostscope_dwarf::TypeInfo, - module_hint: Option<&str>, - ) -> Result<()> { - tracing::trace!( - var_name_index = meta.var_name_index, - type_index = meta.type_index, - access_path = %meta.access_path, - type_size = dwarf_type.size(), - data_len_limit = meta.data_len_limit, - address = ?address, - "generate_print_complex_variable_runtime: begin" - ); - // Compute sizes first, then reserve instruction region directly in accumulation buffer - - // Compute sizes - let access_path_bytes = meta.access_path.as_bytes(); - let access_path_len = std::cmp::min(access_path_bytes.len(), 255); // u8 max - let type_size = dwarf_type.size() as usize; - let mut data_len = std::cmp::min(type_size, meta.data_len_limit); - if data_len > u16::MAX as usize { - data_len = u16::MAX as usize; - } - - let header_size = std::mem::size_of::(); - let data_struct_size = std::mem::size_of::(); - // Reserve enough space to hold either the value (read_len) or an error payload (12 bytes) - let reserved_payload = std::cmp::max(data_len, 12); - let total_data_length = data_struct_size + access_path_len + reserved_payload; - let total_size = header_size + total_data_length; - tracing::trace!( - header_size, - data_struct_size, - access_path_len, - data_len, - total_data_length, - total_size, - "generate_print_complex_variable_runtime: sizes computed" - ); - - // Reserve space now that sizes are known - let inst_buffer = self - .reserve_instruction_region_or_return_zero(total_size as u64)? - .into_value_after_runtime_returns(); - - // Avoid memset; reserved map value bytes are zero-initialized - - // Write InstructionHeader.inst_type at offset 0 - let inst_type_val = self - .context - .i8_type() - .const_int(InstructionType::PrintComplexVariable as u64, false); - self.builder - .build_store(inst_buffer, inst_type_val) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store inst_type: {e}")))?; - tracing::trace!( - "generate_print_complex_variable_runtime: wrote inst_type=PrintComplexVariable" - ); - - // Write InstructionHeader - // data_length field (u16) at offset 1 - let data_length_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - inst_buffer, - &[self.context.i32_type().const_int(1, false)], - "data_length_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get data_length GEP: {e}")) - })? - }; - let data_length_ptr_cast = self - .builder - .build_pointer_cast( - data_length_ptr, - self.context.ptr_type(AddressSpace::default()), - "data_length_ptr_cast", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast data_length ptr: {e}")))?; - self.builder - .build_store( - data_length_ptr_cast, - self.context - .i16_type() - .const_int(total_data_length as u64, false), - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store data_length: {e}")))?; - tracing::trace!( - data_length = total_data_length, - "generate_print_complex_variable_runtime: wrote data_length" - ); - - // Data pointer (after header) - let data_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - inst_buffer, - &[self.context.i32_type().const_int(header_size as u64, false)], - "data_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to get data GEP: {e}")))? - }; - - // var_name_index (u16) - let var_name_index_val = self - .context - .i16_type() - .const_int(meta.var_name_index as u64, false); - // Store var_name_index at offset offsetof(PrintComplexVariableData, var_name_index) - let var_name_index_off = - std::mem::offset_of!(PrintComplexVariableData, var_name_index) as u64; - let var_name_index_ptr_i8 = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - data_ptr, - &[self.context.i32_type().const_int(var_name_index_off, false)], - "var_name_index_ptr_i8", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get var_name_index GEP: {e}")) - })? - }; - let var_name_index_ptr_i16 = self - .builder - .build_pointer_cast( - var_name_index_ptr_i8, - self.context.ptr_type(AddressSpace::default()), - "var_name_index_ptr_i16", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to cast var_name_index ptr: {e}")) - })?; - self.builder - .build_store(var_name_index_ptr_i16, var_name_index_val) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store var_name_index: {e}")))?; - tracing::trace!( - var_name_index = meta.var_name_index, - "generate_print_complex_variable_runtime: wrote var_name_index" - ); - - // type_index (u16) right after var_name_index - // type_index at offset offsetof(PrintComplexVariableData, type_index) = 2 - let type_index_offset = std::mem::offset_of!(PrintComplexVariableData, type_index) as u64; - let type_index_ptr_i8 = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - data_ptr, - &[self.context.i32_type().const_int(type_index_offset, false)], - "type_index_ptr_i8", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get type_index GEP: {e}")) - })? - }; - let type_index_ptr = self - .builder - .build_pointer_cast( - type_index_ptr_i8, - self.context.ptr_type(AddressSpace::default()), - "type_index_ptr_i16", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast type_index ptr: {e}")))?; - let type_index_val = self - .context - .i16_type() - .const_int(meta.type_index as u64, false); - self.builder - .build_store(type_index_ptr, type_index_val) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store type_index: {e}")))?; - tracing::trace!( - type_index = meta.type_index, - "generate_print_complex_variable_runtime: wrote type_index" - ); - - // access_path_len (u8) - // access_path_len at offset offsetof(..., access_path_len) - let access_path_len_off = - std::mem::offset_of!(PrintComplexVariableData, access_path_len) as u64; - let access_path_len_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - data_ptr, - &[self - .context - .i32_type() - .const_int(access_path_len_off, false)], - "access_path_len_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get access_path_len GEP: {e}")) - })? - }; - self.builder - .build_store( - access_path_len_ptr, - self.context - .i8_type() - .const_int(access_path_len as u64, false), - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to store access_path_len: {e}")) - })?; - tracing::trace!( - access_path_len, - "generate_print_complex_variable_runtime: wrote access_path_len" - ); - - // status (u8) at offset offsetof(..., status) - let status_off = std::mem::offset_of!(PrintComplexVariableData, status) as u64; - let status_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - data_ptr, - &[self.context.i32_type().const_int(status_off, false)], - "status_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to get status GEP: {e}")))? - }; - self.builder - .build_store( - status_ptr, - self.context - .i8_type() - .const_int(VariableStatus::Ok as u64, false), - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store status: {e}")))?; - - // (Optimized-out handling moved below after data_len pointer is available) - - // data_len (u16) - let data_len_off = std::mem::offset_of!(PrintComplexVariableData, data_len) as u64; - let data_len_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - data_ptr, - &[self.context.i32_type().const_int(data_len_off, false)], - "data_len_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to get data_len GEP: {e}")))? - }; - let data_len_ptr_cast = self - .builder - .build_pointer_cast( - data_len_ptr, - self.context.ptr_type(AddressSpace::default()), - "data_len_ptr_i16", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast data_len ptr: {e}")))?; - self.builder - .build_store( - data_len_ptr_cast, - self.context.i16_type().const_int(data_len as u64, false), - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store data_len: {e}")))?; - tracing::trace!( - data_len, - "generate_print_complex_variable_runtime: wrote data_len" - ); - - // Optimized-out case is handled earlier by resolving to an OptimizedOut type and ImmediateBytes path. - - // access_path bytes start after PrintComplexVariableData - let access_path_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - data_ptr, - &[self.context.i32_type().const_int( - std::mem::size_of::() as u64, - false, - )], - "access_path_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get access_path GEP: {e}")) - })? - }; - - // Copy access path bytes - for (i, &byte) in access_path_bytes.iter().enumerate().take(access_path_len) { - let byte_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - access_path_ptr, - &[self.context.i32_type().const_int(i as u64, false)], - &format!("access_path_byte_{i}"), - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get access_path byte GEP: {e}")) - })? - }; - let byte_val = self.context.i8_type().const_int(byte as u64, false); - self.builder.build_store(byte_ptr, byte_val).map_err(|e| { - CodeGenError::LLVMError(format!("Failed to store access_path byte: {e}")) - })?; - } - if access_path_len > 0 { - tracing::trace!("generate_print_complex_variable_runtime: wrote access_path bytes"); - } - - // Variable data starts after access_path - let variable_data_ptr = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - access_path_ptr, - &[self - .context - .i32_type() - .const_int(access_path_len as u64, false)], - "variable_data_ptr", - ) - .map_err(|e| { - CodeGenError::LLVMError(format!("Failed to get variable_data GEP: {e}")) - })? - }; - - // Compute source address with ASLR-aware helper, honoring module hint - // Prefer a previously recorded module path for offsets; fall back handled in helper - let src_addr = self.resolve_planned_address(address, Some(status_ptr), module_hint)?; - tracing::trace!(src_addr = %src_addr.value, "generate_print_complex_variable_runtime: computed src_addr"); - - // Setup common types and casts - let ptr_type = self.context.ptr_type(AddressSpace::default()); - let i32_type = self.context.i32_type(); - let i64_type = self.context.i64_type(); - let dst_ptr = self - .builder - .build_bit_cast(variable_data_ptr, ptr_type, "dst_ptr") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let size_val = i32_type.const_int(data_len as u64, false); - let src_ptr = self - .builder - .build_int_to_ptr(src_addr.value, ptr_type, "src_ptr") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let offsets_found = src_addr.offsets_found; - let current_fn = self.current_function("generate print complex variable runtime")?; - let cont_block = self.context.append_basic_block(current_fn, "after_read"); - let skip_block = self.context.append_basic_block(current_fn, "offsets_skip"); - let found_block = self.context.append_basic_block(current_fn, "offsets_found"); - self.builder - .build_conditional_branch(offsets_found, found_block, skip_block) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.builder.position_at_end(skip_block); - self.mark_any_fail()?; - self.builder - .build_store(data_len_ptr_cast, self.context.i16_type().const_zero()) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.builder - .build_unconditional_branch(cont_block) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.builder.position_at_end(found_block); - - // Branch: NULL deref if src_addr == 0 - let zero64 = i64_type.const_zero(); - let is_null = self - .builder - .build_int_compare(inkwell::IntPredicate::EQ, src_addr.value, zero64, "is_null") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let null_block = self.context.append_basic_block(current_fn, "null_deref"); - let read_block = self.context.append_basic_block(current_fn, "read_user"); - self.builder - .build_conditional_branch(is_null, null_block, read_block) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - - // NULL path - self.builder.position_at_end(null_block); - self.builder - .build_store( - status_ptr, - self.context - .i8_type() - .const_int(VariableStatus::NullDeref as u64, false), - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - // data_len = 0 - self.builder - .build_store(data_len_ptr_cast, self.context.i16_type().const_zero()) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - // mark fail - self.mark_any_fail()?; - self.builder - .build_unconditional_branch(cont_block) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - - // Read path - self.builder.position_at_end(read_block); - let ret = self - .create_bpf_helper_call( - BPF_FUNC_probe_read_user as u64, - &[dst_ptr, size_val.into(), src_ptr.into()], - i32_type.into(), - "probe_read_user", - )? - .into_int_value(); - let is_err = self - .builder - .build_int_compare( - inkwell::IntPredicate::SLT, - ret, - i32_type.const_zero(), - "ret_lt_zero", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let err_block = self.context.append_basic_block(current_fn, "read_err"); - let ok_block = self.context.append_basic_block(current_fn, "read_ok"); - self.builder - .build_conditional_branch(is_err, err_block, ok_block) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - - // Error: status=2 (read_user failed); attach errno+addr payload and set data_len=12 - self.builder.position_at_end(err_block); - // Only set ReadError if status is still Ok (preserve OffsetsUnavailable etc.) - let cur_status1 = self - .builder - .build_load(self.context.i8_type(), status_ptr, "cur_status1") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let is_ok1 = self - .builder - .build_int_compare( - inkwell::IntPredicate::EQ, - cur_status1.into_int_value(), - self.context.i8_type().const_zero(), - "status_is_ok1", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let readerr_val = self - .context - .i8_type() - .const_int(VariableStatus::ReadError as u64, false) - .into(); - let new_status1 = self - .builder - .build_select(is_ok1, readerr_val, cur_status1, "status_after_readerr") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.builder - .build_store(status_ptr, new_status1) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - // data_len = 12 (errno:i32 + addr:u64) - self.builder - .build_store( - data_len_ptr_cast, - self.context.i16_type().const_int(12, false), - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - // write errno at [0..4] - let errno_ptr = self - .builder - .build_pointer_cast( - variable_data_ptr, - self.context.ptr_type(AddressSpace::default()), - "errno_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast errno ptr: {e}")))?; - let errno = self.build_errno_i32(ret, "readerr_errno_i32")?; - self.builder - .build_store(errno_ptr, errno) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store errno: {e}")))?; - // write addr at [4..12] - let addr_ptr_i8 = unsafe { - self.builder - .build_gep( - self.context.i8_type(), - variable_data_ptr, - &[self.context.i32_type().const_int(4, false)], - "addr_ptr_i8", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to get addr GEP: {e}")))? - }; - let addr_ptr = self - .builder - .build_pointer_cast( - addr_ptr_i8, - self.context.ptr_type(AddressSpace::default()), - "addr_ptr", - ) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast addr ptr: {e}")))?; - self.builder - .build_store(addr_ptr, src_addr.value) - .map_err(|e| CodeGenError::LLVMError(format!("Failed to store addr: {e}")))?; - // mark fail - self.mark_any_fail()?; - self.builder - .build_unconditional_branch(cont_block) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - - // OK path: status=0; optional truncated if data_len_limit < dwarf_type.size() - self.builder.position_at_end(ok_block); - if data_len < dwarf_type.size() as usize { - // truncated - self.builder - .build_store( - status_ptr, - self.context - .i8_type() - .const_int(VariableStatus::Truncated as u64, false), - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - // mark both success and fail - self.mark_any_success()?; - self.mark_any_fail()?; - } else { - // success - self.mark_any_success()?; - } - self.builder - .build_unconditional_branch(cont_block) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - - // Continue - self.builder.position_at_end(cont_block); - - // Already accumulated; EndInstruction will send the whole event - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::CompileOptions; - use ghostscope_protocol::trace_event::{TraceEventHeader, TraceEventMessage}; - - #[test] - fn print_complex_format_budget_tracks_event_size() { - let bytes_reserved_so_far = - std::mem::size_of::() + std::mem::size_of::(); - let expected = 32768 - - (bytes_reserved_so_far - + std::mem::size_of::() - + std::mem::size_of::()); - assert_eq!( - print_complex_format_instruction_budget(32768, bytes_reserved_so_far), - expected - ); - assert!(print_complex_format_instruction_budget(32768, bytes_reserved_so_far) > 4096); - } - - #[test] - fn print_complex_format_budget_shrinks_after_prior_instructions() { - let bytes_reserved_so_far = std::mem::size_of::() - + std::mem::size_of::() - + 2048; - let base_budget = print_complex_format_instruction_budget( - 32768, - std::mem::size_of::() + std::mem::size_of::(), - ); - assert_eq!( - print_complex_format_instruction_budget(32768, bytes_reserved_so_far), - base_budget - 2048 - ); - } - - #[test] - fn dynamic_payload_reservations_share_budget_fairly() { - let reservations = allocate_dynamic_payload_reservations(&[256, 256, 256, 256], 512); - assert_eq!(reservations, vec![128, 128, 128, 128]); - } - - #[test] - fn dynamic_payload_reservations_keep_error_headroom_when_possible() { - let reservations = allocate_dynamic_payload_reservations(&[256, 256, 256], 36); - assert_eq!(reservations, vec![12, 12, 12]); - } - - #[test] - fn build_errno_i32_truncates_i64_errors() { - let context = inkwell::context::Context::create(); - let opts = CompileOptions::default(); - let ctx = - EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("create EbpfContext"); - let fn_type = context.i32_type().fn_type(&[], false); - let function = ctx.module.add_function("errno_test", fn_type, None); - let block = context.append_basic_block(function, "entry"); - ctx.builder.position_at_end(block); - - let errno = ctx - .build_errno_i32( - context.i64_type().const_int((-14i64) as u64, true), - "errno_i32", - ) - .expect("truncate errno"); - assert_eq!(errno.get_type().get_bit_width(), 32); - } - - #[test] - fn computed_int_store_i64_compiles() { - let context = inkwell::context::Context::create(); - let opts = CompileOptions::default(); - let mut ctx = - EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("create EbpfContext"); - // print {} with a pure script integer expression triggers ComputedInt path - let expr = crate::script::Expr::BinaryOp { - left: Box::new(crate::script::Expr::Int(41)), - op: crate::script::BinaryOp::Add, - right: Box::new(crate::script::Expr::Int(1)), - }; - let stmt = - crate::script::Statement::Print(crate::script::PrintStatement::ComplexVariable(expr)); - let program = crate::script::Program::new(); - let res = ctx.compile_program(&program, "test_func", &[stmt], None, None, None); - assert!(res.is_ok(), "Compilation failed: {:?}", res.err()); - } - - #[test] - fn computed_int_in_format_compiles() { - let context = inkwell::context::Context::create(); - let opts = CompileOptions::default(); - let mut ctx = - EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("create EbpfContext"); - // formatted print with expression argument should also route into ComputedInt path - let expr = crate::script::Expr::BinaryOp { - left: Box::new(crate::script::Expr::Int(1)), - op: crate::script::BinaryOp::Add, - right: Box::new(crate::script::Expr::Int(2)), - }; - let stmt = crate::script::Statement::Print(crate::script::PrintStatement::Formatted { - format: "sum:{}".to_string(), - args: vec![expr], - }); - let program = crate::script::Program::new(); - let res = ctx.compile_program(&program, "test_fmt", &[stmt], None, None, None); - assert!(res.is_ok(), "Compilation failed: {:?}", res.err()); - } - - #[test] - fn memcmp_rejects_script_pointer_variable_now() { - let context = inkwell::context::Context::create(); - let opts = CompileOptions::default(); - let mut ctx = - EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("create EbpfContext"); - - // let p = "A"; // script pointer to const string (no longer accepted as memcmp arg) - let decl = crate::script::Statement::VarDeclaration { - name: "p".to_string(), - value: crate::script::Expr::String("A".to_string()), - }; - - // if memcmp(p, hex("41"), 1) { print "OK"; } - let if_stmt = crate::script::Statement::If { - condition: crate::script::Expr::BuiltinCall { - name: "memcmp".to_string(), - args: vec![ - crate::script::Expr::Variable("p".to_string()), - crate::script::Expr::BuiltinCall { - name: "hex".to_string(), - args: vec![crate::script::Expr::String("41".to_string())], - }, - crate::script::Expr::Int(1), - ], - }, - then_body: vec![crate::script::Statement::Print( - crate::script::PrintStatement::String("OK".to_string()), - )], - else_body: None, - }; - - let program = crate::script::Program::new(); - let res = ctx.compile_program( - &program, - "test_memcmp_ptr", - &[decl, if_stmt], - None, - None, - None, - ); - assert!( - res.is_err(), - "Expected type error for script pointer variable in memcmp" - ); - } - - #[test] - fn strncmp_requires_string_on_one_side_error_message() { - let context = inkwell::context::Context::create(); - let opts = CompileOptions::default(); - let mut ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("ctx"); - - // strncmp(42, 43, 2) -> neither side is string (literal/var); expect type error - let stmt = crate::script::Statement::If { - condition: crate::script::Expr::BuiltinCall { - name: "strncmp".to_string(), - args: vec![ - crate::script::Expr::Int(42), - crate::script::Expr::Int(43), - crate::script::Expr::Int(2), - ], - }, - then_body: vec![crate::script::Statement::Print( - crate::script::PrintStatement::String("OK".to_string()), - )], - else_body: None, - }; - let program = crate::script::Program::new(); - let res = ctx.compile_program(&program, "test_strncmp_err", &[stmt], None, None, None); - assert!( - res.is_err(), - "expected error when neither side is string (got {res:?})", - ); - let msg = format!("{:?}", res.err()); - assert!(msg.contains("strncmp requires at least one string argument")); - } - - // No test needed here for string var copy rejection; current semantics allow - // let s = "A"; let p = s; as a string-to-string assignment. - - #[test] - fn immutable_variable_redeclaration_rejected() { - let context = inkwell::context::Context::create(); - let opts = CompileOptions::default(); - let mut ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("ctx"); - - // let x = 1; let x = 2; (same trace block) - let d1 = crate::script::Statement::VarDeclaration { - name: "x".to_string(), - value: crate::script::Expr::Int(1), - }; - let d2 = crate::script::Statement::VarDeclaration { - name: "x".to_string(), - value: crate::script::Expr::Int(2), - }; - let program = crate::script::Program::new(); - let res = ctx.compile_program(&program, "immut", &[d1, d2], None, None, None); - assert!(res.is_err(), "expected immutability error, got {res:?}"); - let msg = format!("{:?}", res.err()); - assert!( - msg.contains("Redeclaration in the same scope") || msg.contains("immutable variable"), - "unexpected error msg: {msg}" - ); - } - - #[test] - fn immutable_alias_rebinding_rejected() { - let context = inkwell::context::Context::create(); - let opts = CompileOptions::default(); - let mut ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("ctx"); - - // let p = &arr[0]; let p = &arr[0]; - let a1 = crate::script::Statement::AliasDeclaration { - name: "p".to_string(), - target: crate::script::Expr::AddressOf(Box::new(crate::script::Expr::Variable( - "arr".to_string(), - ))), - }; - let a2 = crate::script::Statement::AliasDeclaration { - name: "p".to_string(), - target: crate::script::Expr::AddressOf(Box::new(crate::script::Expr::Variable( - "arr".to_string(), - ))), - }; - let program = crate::script::Program::new(); - let res = ctx.compile_program(&program, "immut_alias", &[a1, a2], None, None, None); - assert!( - res.is_err(), - "expected immutability error for alias, got {res:?}" - ); - } - - #[test] - fn alias_to_alias_with_const_offset_is_alias_variable() { - let context = inkwell::context::Context::create(); - let opts = CompileOptions::default(); - let mut ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("ctx"); - // let base = &buf[0]; let tail = base + 16; - let s1 = crate::script::Statement::AliasDeclaration { - name: "base".to_string(), - target: crate::script::Expr::AddressOf(Box::new(crate::script::Expr::ArrayAccess( - Box::new(crate::script::Expr::Variable("buf".to_string())), - Box::new(crate::script::Expr::Int(0)), - ))), - }; - let s2 = crate::script::Statement::VarDeclaration { - name: "tail".to_string(), - value: crate::script::Expr::BinaryOp { - left: Box::new(crate::script::Expr::Variable("base".to_string())), - op: crate::script::BinaryOp::Add, - right: Box::new(crate::script::Expr::Int(16)), - }, - }; - let program = crate::script::Program::new(); - // Should treat tail as alias (not as value), thus compile_program succeeds - let res = ctx.compile_program(&program, "alias_stage", &[s1, s2], None, None, None); - assert!(res.is_ok(), "expected alias-to-alias staging to compile"); - } - - #[test] - fn alias_to_alias_with_negative_const_offset_is_alias_variable() { - let context = inkwell::context::Context::create(); - let opts = CompileOptions::default(); - let mut ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("ctx"); - // let base = &buf[1]; let head = base + -1; - let base = crate::script::Statement::AliasDeclaration { - name: "base".to_string(), - target: crate::script::Expr::AddressOf(Box::new(crate::script::Expr::ArrayAccess( - Box::new(crate::script::Expr::Variable("buf".to_string())), - Box::new(crate::script::Expr::Int(1)), - ))), - }; - let negative_one = crate::script::Expr::BinaryOp { - left: Box::new(crate::script::Expr::Int(0)), - op: crate::script::BinaryOp::Subtract, - right: Box::new(crate::script::Expr::Int(1)), - }; - let head = crate::script::Statement::VarDeclaration { - name: "head".to_string(), - value: crate::script::Expr::BinaryOp { - left: Box::new(crate::script::Expr::Variable("base".to_string())), - op: crate::script::BinaryOp::Add, - right: Box::new(negative_one), - }, - }; - let program = crate::script::Program::new(); - let res = ctx.compile_program(&program, "alias_neg_stage", &[base, head], None, None, None); - assert!( - res.is_ok(), - "expected alias plus negative literal staging to compile" - ); - } - - #[test] - fn pointer_arithmetic_parts_fold_negative_literal_offsets() { - let negative_one = crate::script::Expr::BinaryOp { - left: Box::new(crate::script::Expr::Int(0)), - op: crate::script::BinaryOp::Subtract, - right: Box::new(crate::script::Expr::Int(1)), - }; - let expr = crate::script::Expr::BinaryOp { - left: Box::new(crate::script::Expr::BinaryOp { - left: Box::new(crate::script::Expr::Variable("p".to_string())), - op: crate::script::BinaryOp::Add, - right: Box::new(negative_one), - }), - op: crate::script::BinaryOp::Add, - right: Box::new(crate::script::Expr::Int(3)), - }; - let (base, index) = EbpfContext::<'static, 'static>::pointer_arithmetic_parts(&expr) - .expect("pointer arithmetic parts"); - assert!(matches!(base, crate::script::Expr::Variable(name) if name == "p")); - assert_eq!(index, 2); - } - - #[test] - fn alias_to_alias_copy_is_alias_variable() { - let context = inkwell::context::Context::create(); - let opts = CompileOptions::default(); - let mut ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("ctx"); - // let a = &G_STATE.lib; let b = a; - let a = crate::script::Statement::AliasDeclaration { - name: "a".to_string(), - target: crate::script::Expr::AddressOf(Box::new(crate::script::Expr::MemberAccess( - Box::new(crate::script::Expr::Variable("G_STATE".to_string())), - "lib".to_string(), - ))), - }; - let b = crate::script::Statement::VarDeclaration { - name: "b".to_string(), - value: crate::script::Expr::Variable("a".to_string()), - }; - let program = crate::script::Program::new(); - let res = ctx.compile_program(&program, "alias_copy", &[a, b], None, None, None); - assert!(res.is_ok(), "expected alias-to-alias copy to compile"); - } - - #[test] - fn alias_self_reference_is_rejected_with_cycle_error() { - let context = inkwell::context::Context::create(); - let opts = CompileOptions::default(); - let mut ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("ctx"); - - // let a = &a; print a; - let a = crate::script::Statement::AliasDeclaration { - name: "a".to_string(), - target: crate::script::Expr::AddressOf(Box::new(crate::script::Expr::Variable( - "a".to_string(), - ))), - }; - let p = crate::script::Statement::Print(crate::script::PrintStatement::ComplexVariable( - crate::script::Expr::Variable("a".to_string()), - )); - let program = crate::script::Program::new(); - let res = ctx.compile_program(&program, "alias_self", &[a, p], None, None, None); - assert!(res.is_err(), "expected cycle error, got {res:?}"); - let msg = format!("{:?}", res.err()); - assert!( - msg.contains("alias cycle") || msg.contains("depth exceeded"), - "unexpected error: {msg}" - ); - } - - #[test] - fn alias_mutual_cycle_is_rejected_with_cycle_error() { - let context = inkwell::context::Context::create(); - let opts = CompileOptions::default(); - let mut ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("ctx"); - - // let a = &b; let b = &a; print a; - let a = crate::script::Statement::AliasDeclaration { - name: "a".to_string(), - target: crate::script::Expr::AddressOf(Box::new(crate::script::Expr::Variable( - "b".to_string(), - ))), - }; - let b = crate::script::Statement::AliasDeclaration { - name: "b".to_string(), - target: crate::script::Expr::AddressOf(Box::new(crate::script::Expr::Variable( - "a".to_string(), - ))), - }; - let p = crate::script::Statement::Print(crate::script::PrintStatement::ComplexVariable( - crate::script::Expr::Variable("a".to_string()), - )); - let program = crate::script::Program::new(); - let res = ctx.compile_program(&program, "alias_cycle", &[a, b, p], None, None, None); - assert!(res.is_err(), "expected cycle error, got {res:?}"); - let msg = format!("{:?}", res.err()); - assert!( - msg.contains("alias cycle") || msg.contains("depth exceeded"), - "unexpected error: {msg}" - ); - } - - #[test] - fn strncmp_folds_with_script_string_and_literal_true() { - let context = inkwell::context::Context::create(); - let opts = CompileOptions::default(); - let mut ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("ctx"); - - // Prepare: let s = "ABC"; - let decl = crate::script::Statement::VarDeclaration { - name: "s".to_string(), - value: crate::script::Expr::String("ABC".to_string()), - }; - let program = crate::script::Program::new(); - let res = ctx.compile_program(&program, "decl", &[decl], None, None, None); - assert!(res.is_ok()); - - // Expression: strncmp(s, "ABD", 2) -> true - let expr = crate::script::Expr::BuiltinCall { - name: "strncmp".to_string(), - args: vec![ - crate::script::Expr::Variable("s".to_string()), - crate::script::Expr::String("ABD".to_string()), - crate::script::Expr::Int(2), - ], - }; - let v = ctx.compile_expr(&expr).expect("compile expr"); - match v { - inkwell::values::BasicValueEnum::IntValue(iv) => { - assert_eq!(iv.get_type().get_bit_width(), 1); - // true expected (string repr may vary across LLVM versions, check both forms) - let s = format!("{iv}"); - assert!(s.contains("i1 true") || s.contains("true")); - } - other => panic!("expected IntValue i1, got {other:?}"), - } - } - - #[test] - fn starts_with_folds_with_two_literals() { - let context = inkwell::context::Context::create(); - let opts = CompileOptions::default(); - let mut ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("ctx"); - - // Expression: starts_with("abcdef", "abc") -> true - let expr = crate::script::Expr::BuiltinCall { - name: "starts_with".to_string(), - args: vec![ - crate::script::Expr::String("abcdef".to_string()), - crate::script::Expr::String("abc".to_string()), - ], - }; - let v = ctx.compile_expr(&expr).expect("compile expr"); - match v { - inkwell::values::BasicValueEnum::IntValue(iv) => { - assert_eq!(iv.get_type().get_bit_width(), 1); - let s = format!("{iv}"); - assert!(s.contains("i1 true") || s.contains("true")); - } - _ => panic!("expected i1"), - } - } - - #[test] - fn starts_with_requires_one_string_side_error() { - let context = inkwell::context::Context::create(); - let opts = CompileOptions::default(); - let mut ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("ctx"); - - // Neither side is string - let expr = crate::script::Expr::BuiltinCall { - name: "starts_with".to_string(), - args: vec![crate::script::Expr::Int(1), crate::script::Expr::Int(2)], - }; - let res = ctx.compile_expr(&expr); - assert!(res.is_err(), "expected error"); - let msg = format!("{:?}", res.err()); - assert!(msg.contains("starts_with requires at least one string argument")); - } - - #[test] - fn shadowing_rejected_in_inner_scope() { - let context = inkwell::context::Context::create(); - let opts = CompileOptions::default(); - let mut ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("ctx"); - - // let x = 1; { let x = 2; } - let d1 = crate::script::Statement::VarDeclaration { - name: "x".to_string(), - value: crate::script::Expr::Int(1), - }; - let inner = - crate::script::Statement::Block(vec![crate::script::Statement::VarDeclaration { - name: "x".to_string(), - value: crate::script::Expr::Int(2), - }]); - let program = crate::script::Program::new(); - let res = ctx.compile_program(&program, "shadow", &[d1, inner], None, None, None); - assert!(res.is_err(), "expected shadowing error"); - let msg = format!("{:?}", res.err()); - assert!( - msg.contains("Shadowing is not allowed") || msg.contains("shadow"), - "unexpected: {msg}" - ); - } - - #[test] - fn out_of_scope_use_is_rejected() { - let context = inkwell::context::Context::create(); - let opts = CompileOptions::default(); - let mut ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("ctx"); - - // { let y = 2; } print y; -> y is out of scope - let block = - crate::script::Statement::Block(vec![crate::script::Statement::VarDeclaration { - name: "y".to_string(), - value: crate::script::Expr::Int(2), - }]); - let print_y = crate::script::Statement::Print(crate::script::PrintStatement::Variable( - "y".to_string(), - )); - let program = crate::script::Program::new(); - let res = ctx.compile_program( - &program, - "out_of_scope", - &[block, print_y], - None, - None, - None, - ); - assert!( - res.is_err(), - "expected out-of-scope or missing analyzer error" - ); - } - - #[test] - fn memcmp_rejects_bare_integer_pointer_argument() { - let context = inkwell::context::Context::create(); - let opts = CompileOptions::default(); - let mut ctx = - EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("create EbpfContext"); - - // let q = 0xdeadbeef; // integer, not a pointer value - let decl = crate::script::Statement::VarDeclaration { - name: "q".to_string(), - value: crate::script::Expr::Int(0xdeadbeef), - }; - - // if memcmp(q, hex("00"), 1) { print "X"; } - let if_stmt = crate::script::Statement::If { - condition: crate::script::Expr::BuiltinCall { - name: "memcmp".to_string(), - args: vec![ - crate::script::Expr::Variable("q".to_string()), - crate::script::Expr::BuiltinCall { - name: "hex".to_string(), - args: vec![crate::script::Expr::String("00".to_string())], - }, - crate::script::Expr::Int(1), - ], - }, - then_body: vec![crate::script::Statement::Print( - crate::script::PrintStatement::String("X".to_string()), - )], - else_body: None, - }; - - let program = crate::script::Program::new(); - let res = ctx.compile_program( - &program, - "test_memcmp_int_ptr", - &[decl, if_stmt], - None, - None, - None, - ); - assert!(res.is_err(), "Expected compilation error but got Ok"); - } - - #[test] - fn expr_to_name_truncates_utf8_safely() { - let context = inkwell::context::Context::create(); - let opts = CompileOptions::default(); - let ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("create ctx"); - // Build a long expression comprised of multibyte chars to exceed 96 chars - let mut chain: Vec = Vec::new(); - for _ in 0..50 { - // each "错误" is 6 bytes, 2 chars -> quickly exceeds 96 chars - chain.push("错误".to_string()); - } - let expr = crate::script::Expr::ChainAccess(chain); - let s = ctx.expr_to_name(&expr); - // Ensure we got a trailing ellipsis and no panic on multibyte boundary - assert!(s.ends_with("...")); - assert!(s.chars().count() <= 96); - } - - #[test] - fn pointer_int_arithmetic_is_rejected_with_friendly_error() { - let context = inkwell::context::Context::create(); - let opts = CompileOptions::default(); - let mut ctx = EbpfContext::new(&context, "ptr_arith", Some(0), &opts).expect("ctx"); - ctx.create_basic_ebpf_function("f").expect("fn"); - - // Create a script variable 'p' of pointer type (null pointer) - let ptr_ty = ctx.context.ptr_type(inkwell::AddressSpace::default()); - let null_ptr = ptr_ty.const_null(); - ctx.store_variable("p", null_ptr.into()).expect("store ptr"); - - // Expression: p + 1 - let expr = crate::script::Expr::BinaryOp { - left: Box::new(crate::script::Expr::Variable("p".to_string())), - op: crate::script::BinaryOp::Add, - right: Box::new(crate::script::Expr::Int(1)), - }; - let res = ctx.compile_expr(&expr); - assert!(res.is_err(), "expected pointer-int arithmetic error"); - let msg = format!("{:?}", res.err()); - assert!( - msg.contains("pointer and integer") - || msg.contains("Unsupported operation between pointer and integer"), - "unexpected error message: {msg}" - ); - } -} diff --git a/ghostscope-compiler/src/ebpf/codegen/args.rs b/ghostscope-compiler/src/ebpf/codegen/args.rs new file mode 100644 index 0000000..11be8e4 --- /dev/null +++ b/ghostscope-compiler/src/ebpf/codegen/args.rs @@ -0,0 +1,898 @@ +use super::*; + +impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { + pub(super) const UNKNOWN_CHAR_ARRAY_READ_FALLBACK: usize = 256; + pub(super) fn complex_arg_from_dwarf_read_plan( + &mut self, + plan: ghostscope_dwarf::VariableReadPlan, + display_name: Option, + ) -> Result> { + let pc_address = self.get_compile_time_context()?.pc_address; + let materialized = self.variable_read_plan_to_materialization(plan, pc_address)?; + let display_name = display_name.unwrap_or_else(|| materialized.name.clone()); + + match &materialized.materialization { + ghostscope_dwarf::VariableMaterialization::Unavailable { + availability: ghostscope_dwarf::Availability::OptimizedOut, + } => { + let optimized_type = ghostscope_dwarf::TypeInfo::OptimizedOut { + name: materialized.name.clone(), + }; + Ok(ComplexArg { + var_name_index: self.trace_context.add_variable_name(display_name), + type_index: self.trace_context.add_type(optimized_type), + access_path: Vec::new(), + data_len: 0, + source: ComplexArgSource::ImmediateBytes { bytes: Vec::new() }, + }) + } + ghostscope_dwarf::VariableMaterialization::Unavailable { availability } => { + Err(Self::dwarf_expression_unavailable_error( + &materialized.name, + availability, + pc_address, + )) + } + ghostscope_dwarf::VariableMaterialization::UserMemoryRead { address } => { + let dwarf_type = materialized.dwarf_type.clone().ok_or_else(|| { + CodeGenError::DwarfError( + "Expression has no DWARF type information".to_string(), + ) + })?; + let data_len = Self::compute_read_size_for_type(&dwarf_type); + if data_len == 0 { + return Err(CodeGenError::TypeSizeNotAvailable(display_name)); + } + let module_hint = + Self::module_path_for_offsets(materialized.module_path.as_deref()); + Ok(ComplexArg { + var_name_index: self.trace_context.add_variable_name(display_name), + type_index: self.trace_context.add_type(dwarf_type.clone()), + access_path: Vec::new(), + data_len, + source: ComplexArgSource::RuntimeRead { + address: address.clone(), + dwarf_type, + module_for_offsets: module_hint, + }, + }) + } + ghostscope_dwarf::VariableMaterialization::DirectValue { .. } => { + let value = + self.variable_materialization_to_llvm_value(&materialized, pc_address, None)?; + let dwarf_type = materialized.dwarf_type.clone().ok_or_else(|| { + CodeGenError::DwarfError( + "Expression has no DWARF type information".to_string(), + ) + })?; + let value = match value { + BasicValueEnum::IntValue(value) => value, + BasicValueEnum::PointerValue(value) => self + .builder + .build_ptr_to_int(value, self.context.i64_type(), "direct_ptr_to_i64") + .map_err(|e| CodeGenError::Builder(e.to_string()))?, + _ => { + return Err(CodeGenError::DwarfError(format!( + "direct DWARF value '{}' did not lower to an integer", + materialized.name + ))) + } + }; + let data_len = Self::compute_read_size_for_type(&dwarf_type).clamp(1, 8); + Ok(ComplexArg { + var_name_index: self.trace_context.add_variable_name(display_name), + type_index: self.trace_context.add_type(dwarf_type), + access_path: Vec::new(), + data_len, + source: ComplexArgSource::ComputedInt { value, byte_len: data_len }, + }) + } + ghostscope_dwarf::VariableMaterialization::Composite { .. } => Err( + CodeGenError::DwarfError(format!( + "DWARF variable '{}' is split across pieces; piece reconstruction is not implemented", + materialized.name + )), + ), + } + } + + /// Unified expression resolver: returns a ComplexArg carrying + /// a consistent var_name_index/type_index/access_path/data_len/source + /// with strict priority: script variables -> DWARF (locals/params/globals). + pub(super) fn resolve_expr_to_arg( + &mut self, + expr: &crate::script::ast::Expr, + ) -> Result> { + use crate::script::ast::Expr as E; + match expr { + // 0) Alias variables: resolve to address and render as pointer value + E::Variable(name) if self.alias_variable_exists(name) => { + let aliased = self.get_alias_variable(name).expect("alias exists"); + let addr_i64 = self.resolve_ptr_i64_from_expr(&aliased)?; + let var_name_index = self.trace_context.add_variable_name(name.clone()); + Ok(ComplexArg { + var_name_index, + type_index: self.add_synthesized_type_index_for_kind(TypeKind::Pointer), + access_path: Vec::new(), + data_len: 8, + source: ComplexArgSource::ComputedInt { + value: addr_i64, + byte_len: 8, + }, + }) + } + // 1) Script variables first + E::Variable(name) if self.variable_exists(name) => { + let val = self.load_variable(name)?; + let var_name_index = self.trace_context.add_variable_name(name.clone()); + // If this is a string variable, print its contents instead of address + if self + .get_variable_type(name) + .is_some_and(|t| matches!(t, crate::script::VarType::String)) + { + let bytes_opt = self.get_string_variable_bytes(name).cloned(); + if let Some(bytes) = bytes_opt { + // Build a char[] type with length=bytes.len() + let char_type = ghostscope_dwarf::TypeInfo::BaseType { + name: "char".to_string(), + size: 1, + encoding: ghostscope_dwarf::constants::DW_ATE_unsigned_char.0 as u16, + }; + let array_type = ghostscope_dwarf::TypeInfo::ArrayType { + element_type: Box::new(char_type), + element_count: Some(bytes.len() as u64), + total_size: Some(bytes.len() as u64), + }; + return Ok(ComplexArg { + var_name_index, + type_index: self.trace_context.add_type(array_type), + access_path: Vec::new(), + data_len: bytes.len(), + source: ComplexArgSource::ImmediateBytes { bytes }, + }); + } + } + match val { + BasicValueEnum::IntValue(iv) => { + // Preserve signedness for display: map bit width to I8/I16/I32/I64 + let bitw = iv.get_type().get_bit_width(); + let (kind, byte_len) = if bitw == 1 { + (TypeKind::Bool, 1) + } else if bitw <= 8 { + (TypeKind::I8, 1) + } else if bitw <= 16 { + (TypeKind::I16, 2) + } else if bitw <= 32 { + (TypeKind::I32, 4) + } else { + (TypeKind::I64, 8) + }; + Ok(ComplexArg { + var_name_index, + type_index: self.add_synthesized_type_index_for_kind(kind), + access_path: Vec::new(), + data_len: byte_len, + source: ComplexArgSource::ComputedInt { + value: iv, + byte_len, + }, + }) + } + BasicValueEnum::PointerValue(pv) => { + // Non-string pointer variable: print as address (hex) + let iv = self + .builder + .build_ptr_to_int(pv, self.context.i64_type(), "ptr_to_i64") + .map_err(|e| CodeGenError::Builder(e.to_string()))?; + Ok(ComplexArg { + var_name_index, + type_index: self.add_synthesized_type_index_for_kind(TypeKind::Pointer), + access_path: Vec::new(), + data_len: 8, + source: ComplexArgSource::ComputedInt { + value: iv, + byte_len: 8, + }, + }) + } + _ => Err(CodeGenError::TypeError( + "Unsupported script variable type for print".to_string(), + )), + } + } + + // 2) String literal -> Immediate bytes (for formatted args) + E::String(s) => { + let mut bytes = s.as_bytes().to_vec(); + bytes.push(0); + let char_type = ghostscope_dwarf::TypeInfo::BaseType { + name: "char".to_string(), + size: 1, + encoding: ghostscope_dwarf::constants::DW_ATE_unsigned_char.0 as u16, + }; + let array_type = ghostscope_dwarf::TypeInfo::ArrayType { + element_type: Box::new(char_type), + element_count: Some(bytes.len() as u64), + total_size: Some(bytes.len() as u64), + }; + Ok(ComplexArg { + var_name_index: self + .trace_context + .add_variable_name("__str_literal".to_string()), + type_index: self.trace_context.add_type(array_type), + access_path: Vec::new(), + data_len: bytes.len(), + source: ComplexArgSource::ImmediateBytes { bytes }, + }) + } + + // 3) Integer literal -> Immediate i64 bytes + E::Int(v) => { + let mut bytes = Vec::with_capacity(8); + bytes.extend_from_slice(&(*v).to_le_bytes()); + let int_type = ghostscope_dwarf::TypeInfo::BaseType { + name: "i64".to_string(), + size: 8, + encoding: ghostscope_dwarf::constants::DW_ATE_signed.0 as u16, + }; + Ok(ComplexArg { + var_name_index: self + .trace_context + .add_variable_name("__int_literal".to_string()), + type_index: self.trace_context.add_type(int_type), + access_path: Vec::new(), + data_len: 8, + source: ComplexArgSource::ImmediateBytes { bytes }, + }) + } + + // 4) AddressOf: return AddressValue (pointer payload will be produced) + E::AddressOf(inner) => { + let var = self + .query_dwarf_for_complex_expr(inner)? + .ok_or_else(|| CodeGenError::VariableNotFound(format!("{inner:?}")))?; + let pc_address = self.get_compile_time_context()?.pc_address; + let materialized = self.variable_read_plan_to_materialization(var, pc_address)?; + let inner_ty = materialized.dwarf_type.as_ref().ok_or_else(|| { + CodeGenError::DwarfError("Expression has no DWARF type information".to_string()) + })?; + let ptr_ty = ghostscope_dwarf::TypeInfo::PointerType { + target_type: Box::new(inner_ty.clone()), + size: 8, + }; + let address = match materialized.materialization { + ghostscope_dwarf::VariableMaterialization::UserMemoryRead { address } => { + address + } + ghostscope_dwarf::VariableMaterialization::Unavailable { availability } => { + return Err(Self::dwarf_expression_unavailable_error( + &materialized.name, + &availability, + pc_address, + )) + } + _ => { + return Err(CodeGenError::DwarfError(format!( + "cannot take address of value-backed DWARF expression '{}'", + materialized.name + ))) + } + }; + let module_hint = + Self::module_path_for_offsets(materialized.module_path.as_deref()); + Ok(ComplexArg { + var_name_index: self + .trace_context + .add_variable_name(self.expr_to_name(expr)), + type_index: self.trace_context.add_type(ptr_ty), + access_path: Vec::new(), + data_len: 8, + source: ComplexArgSource::AddressValue { + address, + module_for_offsets: module_hint, + }, + }) + } + + // 5) Complex lvalue shapes -> DWARF runtime read + expr @ (E::MemberAccess(_, _) + | E::ArrayAccess(_, _) + | E::PointerDeref(_) + | E::ChainAccess(_)) => { + if let E::ArrayAccess(array_expr, index_expr) = expr { + if let Some((BasicValueEnum::IntValue(value), _element_type)) = + self.compile_dynamic_array_access_value(array_expr, index_expr)? + { + let bitw = value.get_type().get_bit_width(); + let (kind, byte_len) = if bitw == 1 { + (TypeKind::Bool, 1) + } else if bitw <= 8 { + (TypeKind::I8, 1) + } else if bitw <= 16 { + (TypeKind::I16, 2) + } else if bitw <= 32 { + (TypeKind::I32, 4) + } else { + (TypeKind::I64, 8) + }; + return Ok(ComplexArg { + var_name_index: self + .trace_context + .add_variable_name(self.expr_to_name(expr)), + type_index: self.add_synthesized_type_index_for_kind(kind), + access_path: Vec::new(), + data_len: byte_len, + source: ComplexArgSource::ComputedInt { value, byte_len }, + }); + } + } + if let E::MemberAccess(obj_expr, field) = expr { + if let Some((BasicValueEnum::IntValue(value), _member_type)) = + self.compile_dynamic_member_access_value(obj_expr, field)? + { + let bitw = value.get_type().get_bit_width(); + let (kind, byte_len) = if bitw == 1 { + (TypeKind::Bool, 1) + } else if bitw <= 8 { + (TypeKind::I8, 1) + } else if bitw <= 16 { + (TypeKind::I16, 2) + } else if bitw <= 32 { + (TypeKind::I32, 4) + } else { + (TypeKind::I64, 8) + }; + return Ok(ComplexArg { + var_name_index: self + .trace_context + .add_variable_name(self.expr_to_name(expr)), + type_index: self.add_synthesized_type_index_for_kind(kind), + access_path: Vec::new(), + data_len: byte_len, + source: ComplexArgSource::ComputedInt { value, byte_len }, + }); + } + } + + let plan = self + .query_dwarf_for_complex_expr_plan(expr)? + .ok_or_else(|| CodeGenError::VariableNotFound(format!("{expr:?}")))?; + let display_name = if matches!(expr, E::PointerDeref(_)) { + Some(self.expr_to_name(expr)) + } else { + None + }; + self.complex_arg_from_dwarf_read_plan(plan, display_name) + } + + // 6) Variable not in script scope → DWARF variable or computed fast-path for simple scalars + E::Variable(name) => { + if let Some(v) = self.query_dwarf_for_variable(name)? { + self.complex_arg_from_dwarf_read_plan(v, None) + } else { + Err(CodeGenError::VariableNotInScope(name.clone())) + } + } + + // 7) Pointer arithmetic (ptr +/- K) → typed runtime read at computed address + E::BinaryOp { .. } => { + // Support: ptr + int, int + ptr, ptr - int (int may be negative) + // Only allow when ptr side resolves to DWARF pointer/array; the offset must be an integer literal for now. + // We emit a RuntimeRead with computed location, preserving the pointed-to DWARF type. + let pointer_arithmetic = self.pointer_arithmetic_parts_expanding_aliases(expr)?; + + // Try DWARF resolution for the pointer side + if let Some((ptr_side, index)) = pointer_arithmetic { + if let Some(var) = self.query_dwarf_for_complex_expr(&ptr_side)? { + if var + .dwarf_type + .as_ref() + .is_some_and(ghostscope_dwarf::is_c_pointer_or_array_type) + { + let pointed_plan = var + .plan_pointer_element_index(index) + .map_err(|err| CodeGenError::DwarfError(err.to_string()))?; + let pc_address = self.get_compile_time_context()?.pc_address; + let materialized = self + .variable_read_plan_to_materialization(pointed_plan, pc_address)?; + let elem_ty = materialized.dwarf_type.clone().ok_or_else(|| { + CodeGenError::DwarfError( + "Expression has no DWARF type information".to_string(), + ) + })?; + let address = + match materialized.materialization { + ghostscope_dwarf::VariableMaterialization::UserMemoryRead { + address, + } => address, + ghostscope_dwarf::VariableMaterialization::Unavailable { + availability, + } => { + return Err(Self::dwarf_expression_unavailable_error( + &materialized.name, + &availability, + pc_address, + )) + } + _ => return Err(CodeGenError::DwarfError( + "pointer arithmetic did not produce an address-backed plan" + .to_string(), + )), + }; + let data_len = Self::compute_read_size_for_type(&elem_ty); + let module_hint = + Self::module_path_for_offsets(materialized.module_path.as_deref()); + if data_len == 0 { + // Fallback for unsized/void targets: print computed address as pointer + let ptr_ti = ghostscope_dwarf::TypeInfo::PointerType { + target_type: Box::new(elem_ty.clone()), + size: 8, + }; + return Ok(ComplexArg { + var_name_index: self + .trace_context + .add_variable_name(self.expr_to_name(expr)), + type_index: self.trace_context.add_type(ptr_ti), + access_path: Vec::new(), + data_len: 8, + source: ComplexArgSource::AddressValue { + address, + module_for_offsets: module_hint, + }, + }); + } + return Ok(ComplexArg { + var_name_index: self + .trace_context + .add_variable_name(self.expr_to_name(expr)), + type_index: self.trace_context.add_type(elem_ty.clone()), + access_path: Vec::new(), + data_len, + source: ComplexArgSource::RuntimeRead { + address, + dwarf_type: elem_ty, + module_for_offsets: module_hint, + }, + }); + } + } + } + + // If pointer side cannot be resolved as DWARF pointer/array, fall back to computed int + let compiled = self.compile_expr(expr)?; + if let BasicValueEnum::IntValue(iv) = compiled { + let bitw = iv.get_type().get_bit_width(); + let (kind, byte_len) = if bitw == 1 { + (TypeKind::Bool, 1) + } else if bitw <= 8 { + (TypeKind::I8, 1) + } else if bitw <= 16 { + (TypeKind::I16, 2) + } else if bitw <= 32 { + (TypeKind::I32, 4) + } else { + (TypeKind::I64, 8) + }; + Ok(ComplexArg { + var_name_index: self + .trace_context + .add_variable_name(self.expr_to_name(expr)), + type_index: self.add_synthesized_type_index_for_kind(kind), + access_path: Vec::new(), + data_len: byte_len, + source: ComplexArgSource::ComputedInt { + value: iv, + byte_len, + }, + }) + } else { + Err(CodeGenError::TypeError( + "Non-integer expression not supported in print".to_string(), + )) + } + } + + // Binary and other rvalue expressions → compile to computed int + other => { + let compiled = self.compile_expr(other)?; + if let BasicValueEnum::IntValue(iv) = compiled { + let bitw = iv.get_type().get_bit_width(); + let (kind, byte_len) = if bitw == 1 { + (TypeKind::Bool, 1) + } else if bitw <= 8 { + (TypeKind::I8, 1) + } else if bitw <= 16 { + (TypeKind::I16, 2) + } else if bitw <= 32 { + (TypeKind::I32, 4) + } else { + (TypeKind::I64, 8) + }; + Ok(ComplexArg { + var_name_index: self + .trace_context + .add_variable_name(self.expr_to_name(other)), + type_index: self.add_synthesized_type_index_for_kind(kind), + access_path: Vec::new(), + data_len: byte_len, + source: ComplexArgSource::ComputedInt { + value: iv, + byte_len, + }, + }) + } else { + Err(CodeGenError::TypeError( + "Non-integer expression not supported in print".to_string(), + )) + } + } + } + } + + /// Emit a single PrintComplexVariable or a single-arg PrintComplexFormat depending on the arg source. + pub(super) fn emit_print_from_arg(&mut self, arg: ComplexArg<'ctx>) -> Result { + match arg.source { + ComplexArgSource::ComputedInt { value, byte_len } => { + self.generate_print_complex_variable_computed( + arg.var_name_index, + arg.type_index, + byte_len, + value, + )?; + Ok(1) + } + ComplexArgSource::RuntimeRead { + address, + ref dwarf_type, + module_for_offsets, + } => { + let meta = PrintVarRuntimeMeta { + var_name_index: arg.var_name_index, + type_index: arg.type_index, + access_path: String::new(), + data_len_limit: arg.data_len, + }; + self.generate_print_complex_variable_runtime( + meta, + &address, + dwarf_type, + module_for_offsets.as_deref(), + )?; + Ok(1) + } + ComplexArgSource::AddressValue { .. } | ComplexArgSource::ImmediateBytes { .. } => { + // Use ComplexFormat with "{}" to render address/immediate nicely + let fmt_idx = self.trace_context.add_string("{}".to_string()); + self.generate_print_complex_format_instruction(fmt_idx, &[arg])?; + Ok(1) + } + ComplexArgSource::MemDump { .. } | ComplexArgSource::MemDumpDynamic { .. } => { + // Use ComplexFormat with "{}"; generate_print_complex_format_instruction handles MemDump + let fmt_idx = self.trace_context.add_string("{}".to_string()); + self.generate_print_complex_format_instruction(fmt_idx, &[arg])?; + Ok(1) + } + } + } + /// Generate PrintComplexVariable instruction that embeds a computed integer value (no runtime read) + pub(super) fn is_char_byte_typeinfo(t: &ghostscope_dwarf::TypeInfo) -> bool { + use ghostscope_dwarf::TypeInfo as TI; + match t { + TI::BaseType { size, encoding, .. } => { + *size == 1 + && (*encoding == ghostscope_dwarf::constants::DW_ATE_unsigned_char.0 as u16 + || *encoding == ghostscope_dwarf::constants::DW_ATE_signed_char.0 as u16 + || *encoding == ghostscope_dwarf::constants::DW_ATE_unsigned.0 as u16 + || *encoding == ghostscope_dwarf::constants::DW_ATE_signed.0 as u16) + } + TI::TypedefType { + underlying_type, .. + } + | TI::QualifiedType { + underlying_type, .. + } => Self::is_char_byte_typeinfo(underlying_type), + _ => false, + } + } + + /// Compute read size for a given DWARF type. + /// Keep strict behavior for general unsized arrays; only apply a bounded fallback for char[]. + pub(super) fn compute_read_size_for_type(t: &ghostscope_dwarf::TypeInfo) -> usize { + use ghostscope_dwarf::TypeInfo as TI; + match t { + TI::ArrayType { + element_type, + element_count, + total_size, + } => { + // Prefer DWARF-provided total size + if let Some(ts) = total_size { + return *ts as usize; + } + // Fallback for arrays without total_size: need element_count * elem_size + let elem_size = element_type.size() as usize; + if elem_size == 0 { + return 0; + } + if let Some(cnt) = element_count { + return elem_size * (*cnt as usize); + } + // Some toolchains emit extern/definition pairs where char[] has no bound in DWARF. + // Keep other unsized arrays strict to avoid silently over-reading unknown layouts. + if Self::is_char_byte_typeinfo(element_type) { + return Self::UNKNOWN_CHAR_ARRAY_READ_FALLBACK; + } + 0 + } + TI::TypedefType { + underlying_type, .. + } + | TI::QualifiedType { + underlying_type, .. + } => Self::compute_read_size_for_type(underlying_type), + _ => t.size() as usize, + } + } + + pub(super) fn unwrap_alias_candidate_dwarf_type( + mut t: &ghostscope_dwarf::TypeInfo, + ) -> &ghostscope_dwarf::TypeInfo { + while let ghostscope_dwarf::TypeInfo::TypedefType { + underlying_type, .. + } + | ghostscope_dwarf::TypeInfo::QualifiedType { + underlying_type, .. + } = t + { + t = underlying_type.as_ref(); + } + t + } + + pub(super) fn is_aliasable_dwarf_type(t: &ghostscope_dwarf::TypeInfo) -> bool { + matches!( + Self::unwrap_alias_candidate_dwarf_type(t), + ghostscope_dwarf::TypeInfo::PointerType { .. } + | ghostscope_dwarf::TypeInfo::ArrayType { .. } + | ghostscope_dwarf::TypeInfo::StructType { .. } + | ghostscope_dwarf::TypeInfo::UnionType { .. } + ) + } + + pub(super) fn expr_to_name(&self, expr: &crate::script::ast::Expr) -> String { + use crate::script::ast::Expr as E; + fn inner(e: &E) -> String { + match e { + E::Variable(s) => s.clone(), + E::MemberAccess(obj, field) => format!("{}.{field}", inner(obj)), + E::ArrayAccess(arr, idx) => format!("{}[{}]", inner(arr), inner(idx)), + E::PointerDeref(p) => format!("*{}", inner(p)), + E::AddressOf(p) => format!("&{}", inner(p)), + E::ChainAccess(v) => v.join("."), + E::Int(v) => v.to_string(), + E::String(s) => format!("\"{s}\""), + E::Float(v) => format!("{v}"), + E::UnaryNot(e1) => format!("!{}", inner(e1)), + E::Bool(v) => v.to_string(), + E::SpecialVar(s) => format!("${s}"), + E::BuiltinCall { name, args } => { + let arg_strs: Vec = args.iter().map(inner).collect(); + format!("{}({})", name, arg_strs.join(", ")) + } + E::BinaryOp { left, op, right } => { + let op_str = match op { + crate::script::ast::BinaryOp::Add => "+", + crate::script::ast::BinaryOp::Subtract => "-", + crate::script::ast::BinaryOp::Multiply => "*", + crate::script::ast::BinaryOp::Divide => "/", + crate::script::ast::BinaryOp::Equal => "==", + crate::script::ast::BinaryOp::NotEqual => "!=", + crate::script::ast::BinaryOp::LessThan => "<", + crate::script::ast::BinaryOp::LessEqual => "<=", + crate::script::ast::BinaryOp::GreaterThan => ">", + crate::script::ast::BinaryOp::GreaterEqual => ">=", + crate::script::ast::BinaryOp::LogicalAnd => "&&", + crate::script::ast::BinaryOp::LogicalOr => "||", + }; + format!("({}{}{})", inner(left), op_str, inner(right)) + } + } + } + let s_full = inner(expr); + const MAX_NAME: usize = 96; + if s_full.chars().count() > MAX_NAME { + // Keep space for ellipsis + let keep = MAX_NAME.saturating_sub(3); + let mut acc = String::with_capacity(MAX_NAME); + for (i, ch) in s_full.chars().enumerate() { + if i >= keep { + break; + } + acc.push(ch); + } + acc.push_str("..."); + acc + } else { + s_full + } + } + + pub(super) fn expr_contains_builtin(expr: &crate::script::ast::Expr) -> bool { + use crate::script::ast::Expr as E; + + match expr { + E::BuiltinCall { .. } => true, + E::UnaryNot(inner) + | E::PointerDeref(inner) + | E::AddressOf(inner) + | E::MemberAccess(inner, _) => Self::expr_contains_builtin(inner), + E::ArrayAccess(base, index) => { + Self::expr_contains_builtin(base) || Self::expr_contains_builtin(index) + } + E::BinaryOp { left, right, .. } => { + Self::expr_contains_builtin(left) || Self::expr_contains_builtin(right) + } + E::Int(_) + | E::Float(_) + | E::String(_) + | E::Bool(_) + | E::Variable(_) + | E::ChainAccess(_) + | E::SpecialVar(_) => false, + } + } + + pub(super) fn compile_print_expr_with_builtin_exprerror( + &mut self, + expr: &crate::script::ast::Expr, + compile: F, + ) -> Result + where + F: FnOnce(&mut Self) -> Result, + { + if !Self::expr_contains_builtin(expr) { + return compile(self); + } + + let prev_context_active = self.condition_context_active; + if prev_context_active { + return compile(self); + } + + let expr_index = self.trace_context.add_string(self.expr_to_name(expr)); + let entry_event_bytes = self.compile_time_event_bytes_upper_bound; + + self.reset_condition_error()?; + self.condition_context_active = true; + let compiled = compile(self); + self.condition_context_active = prev_context_active; + let compiled = compiled?; + + let current_function = self + .builder + .get_insert_block() + .ok_or_else(|| CodeGenError::LLVMError("No current basic block".to_string()))? + .get_parent() + .ok_or_else(|| CodeGenError::LLVMError("No parent function".to_string()))?; + let err_block = self + .context + .append_basic_block(current_function, "print_expr_err_block"); + let ok_block = self + .context + .append_basic_block(current_function, "print_expr_ok_block"); + let merge_block = self + .context + .append_basic_block(current_function, "print_expr_merge_block"); + let cond_err_pred = self.build_condition_error_predicate()?; + self.builder + .build_conditional_branch(cond_err_pred, err_block, ok_block) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to branch on print expr error: {e}")) + })?; + + self.builder.position_at_end(err_block); + self.compile_time_event_bytes_upper_bound = entry_event_bytes; + self.emit_current_condition_exprerror(expr_index, "print_expr")?; + let err_path_event_bytes = self.compile_time_event_bytes_upper_bound; + self.builder + .build_unconditional_branch(merge_block) + .map_err(|e| { + CodeGenError::LLVMError(format!( + "Failed to branch from print expr error block: {e}" + )) + })?; + + self.builder.position_at_end(ok_block); + self.compile_time_event_bytes_upper_bound = entry_event_bytes; + self.builder + .build_unconditional_branch(merge_block) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to branch from print expr ok block: {e}")) + })?; + + self.builder.position_at_end(merge_block); + self.compile_time_event_bytes_upper_bound = entry_event_bytes.max(err_path_event_bytes); + Ok(compiled) + } + + pub(super) fn emit_current_condition_exprerror( + &mut self, + expr_index: u16, + name_prefix: &str, + ) -> Result<()> { + let cond_err_ptr = self.get_or_create_cond_error_global(); + let err_code = self + .builder + .build_load( + self.context.i8_type(), + cond_err_ptr, + &format!("{name_prefix}_err_code"), + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + .into_int_value(); + let cond_err_addr_ptr = self.get_or_create_cond_error_addr_global(); + let err_addr = self + .builder + .build_load( + self.context.i64_type(), + cond_err_addr_ptr, + &format!("{name_prefix}_err_addr"), + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + .into_int_value(); + let cond_err_flags_ptr = self.get_or_create_cond_error_flags_global(); + let err_flags = self + .builder + .build_load( + self.context.i8_type(), + cond_err_flags_ptr, + &format!("{name_prefix}_err_flags"), + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + .into_int_value(); + self.generate_expr_error(expr_index, err_code, err_flags, err_addr) + } + + /// Heuristic to decide if an expression should be bound as a DWARF alias variable. + /// Prefer shapes that resolve to a runtime address via DWARF or address-of: + /// - AddressOf(...) + /// - Member/Array/PointerDeref/Chain access + /// - Variable that is a DWARF-backed symbol (not a script var) + /// - Offset arithmetic on top of an aliasy expression: alias +/- integer expression + pub(super) fn is_alias_candidate_expr(&mut self, expr: &crate::script::ast::Expr) -> bool { + use crate::script::ast::BinaryOp as BO; + use crate::script::ast::Expr as E; + match expr { + // Alias variable names are alias candidates + E::Variable(name) if self.alias_variable_exists(name) => true, + // Explicit address-of is always an alias + E::AddressOf(_) => true, + // Constant offset on top of an alias-eligible expression + E::BinaryOp { + left, + op: BO::Add, + right, + } => { + let left_is_alias = self.is_alias_candidate_expr(left); + let right_is_alias = self.is_alias_candidate_expr(right); + (left_is_alias && !right_is_alias) || (right_is_alias && !left_is_alias) + } + E::BinaryOp { + left, + op: BO::Subtract, + right, + } => self.is_alias_candidate_expr(left) && !self.is_alias_candidate_expr(right), + // Otherwise, only keep address-like or aggregate DWARF expressions as aliases. + // Scalar DWARF expressions should stay concrete so `let n = foo.len;` behaves + // like an integer script variable and remains usable in capture-length formatting. + other => self + .query_dwarf_for_complex_expr(other) + .ok() + .flatten() + .and_then(|var| var.dwarf_type) + .is_some_and(|ty| Self::is_aliasable_dwarf_type(&ty)), + } + } + + // removed old helpers (pure lvalue/binary_op detection) — unified resolver handles shapes +} diff --git a/ghostscope-compiler/src/ebpf/codegen/backtrace.rs b/ghostscope-compiler/src/ebpf/codegen/backtrace.rs new file mode 100644 index 0000000..09a2207 --- /dev/null +++ b/ghostscope-compiler/src/ebpf/codegen/backtrace.rs @@ -0,0 +1,76 @@ +use super::*; + +impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { + // PrintVariableError instruction has been removed; compile-time errors are returned as Err, + // runtime errors are carried via per-variable status in Print* instructions. + + /// Generate Backtrace instruction + pub fn generate_backtrace_instruction(&mut self, depth: u8) -> Result<()> { + info!("Generating Backtrace instruction: depth={}", depth); + + // Reserve space directly for Backtrace instruction + let inst_buffer = self + .reserve_instruction_region_or_return_zero( + (std::mem::size_of::() + std::mem::size_of::()) + as u64, + )? + .into_value_after_runtime_returns(); + + // Write InstructionHeader.inst_type + let inst_type_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + inst_buffer, + &[self.context.i32_type().const_int( + std::mem::offset_of!(InstructionHeader, inst_type) as u64, + false, + )], + "bt_inst_type_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to get inst_type GEP: {e}")))? + }; + let inst_type_val = self + .context + .i8_type() + .const_int(InstructionType::Backtrace as u64, false); + self.builder + .build_store(inst_type_ptr, inst_type_val) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store inst_type: {e}")))?; + + // Write InstructionHeader.data_length (u16) + let data_length_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + inst_buffer, + &[self.context.i32_type().const_int( + std::mem::offset_of!(InstructionHeader, data_length) as u64, + false, + )], + "bt_data_length_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get data_length GEP: {e}")) + })? + }; + let data_length_i16_ptr = self + .builder + .build_pointer_cast( + data_length_ptr, + self.context.ptr_type(AddressSpace::default()), + "bt_data_length_i16_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast data_length ptr: {e}")))?; + let dl_val = self + .context + .i16_type() + .const_int(std::mem::size_of::() as u64, false); + self.builder + .build_store(data_length_i16_ptr, dl_val) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store data_length: {e}")))?; + + // Already accumulated; EndInstruction will send the whole event. Depth currently unused at BPF level. + Ok(()) + } +} diff --git a/ghostscope-compiler/src/ebpf/codegen/expr_error.rs b/ghostscope-compiler/src/ebpf/codegen/expr_error.rs new file mode 100644 index 0000000..bce1d7b --- /dev/null +++ b/ghostscope-compiler/src/ebpf/codegen/expr_error.rs @@ -0,0 +1,197 @@ +use super::*; + +impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { + /// Generate ExprError instruction with expression string index and error code/flags + pub fn generate_expr_error( + &mut self, + expr_string_index: u16, + error_code_iv: inkwell::values::IntValue<'ctx>, + flags_iv: inkwell::values::IntValue<'ctx>, + failing_addr_iv: inkwell::values::IntValue<'ctx>, + ) -> Result<()> { + // Reserve space in accumulation buffer for this instruction + let inst_buffer = self + .reserve_instruction_region_or_return_zero( + (std::mem::size_of::() + + std::mem::size_of::()) + as u64, + )? + .into_value_after_runtime_returns(); + + // Store instruction type at offset 0 + let inst_type_val = self + .context + .i8_type() + .const_int(InstructionType::ExprError as u64, false); + self.builder + .build_store(inst_buffer, inst_type_val) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store inst_type: {e}")))?; + + // data_length + let data_length_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + inst_buffer, + &[self.context.i32_type().const_int( + std::mem::offset_of!(InstructionHeader, data_length) as u64, + false, + )], + "exprerr_data_length_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get data_length GEP: {e}")) + })? + }; + let data_length_i16_ptr = self + .builder + .build_pointer_cast( + data_length_ptr, + self.context.ptr_type(AddressSpace::default()), + "exprerr_data_length_i16_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast data_length ptr: {e}")))?; + let data_length_val = self.context.i16_type().const_int( + std::mem::size_of::() as u64, + false, + ); + self.builder + .build_store(data_length_i16_ptr, data_length_val) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store data_length: {e}")))?; + + // Payload fields after header + // string_index at offset sizeof(InstructionHeader) + 0 (u16) + let si_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + inst_buffer, + &[self + .context + .i32_type() + .const_int(std::mem::size_of::() as u64, false)], + "exprerr_si_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get string_index GEP: {e}")) + })? + }; + let si_i16_ptr = self + .builder + .build_pointer_cast( + si_ptr, + self.context.ptr_type(AddressSpace::default()), + "exprerr_si_i16_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to cast string_index ptr: {e}")) + })?; + let si_val = self + .context + .i16_type() + .const_int(expr_string_index as u64, false); + self.builder + .build_store(si_i16_ptr, si_val) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store string_index: {e}")))?; + + // error_code at +2, flags at +3 + let ec_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + inst_buffer, + &[self + .context + .i32_type() + .const_int((std::mem::size_of::() + 2) as u64, false)], + "exprerr_ec_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get error_code GEP: {e}")) + })? + }; + // Truncate/extend runtime error code to i8 + let ec_i8 = if error_code_iv.get_type().get_bit_width() == 8 { + error_code_iv + } else if error_code_iv.get_type().get_bit_width() > 8 { + self.builder + .build_int_truncate(error_code_iv, self.context.i8_type(), "ec_trunc") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + } else { + self.builder + .build_int_z_extend(error_code_iv, self.context.i8_type(), "ec_zext") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + }; + self.builder + .build_store(ec_ptr, ec_i8) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store error_code: {e}")))?; + let fl_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + inst_buffer, + &[self + .context + .i32_type() + .const_int((std::mem::size_of::() + 3) as u64, false)], + "exprerr_flags_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to get flags GEP: {e}")))? + }; + // Truncate/extend runtime flags to i8 + let fl_i8 = if flags_iv.get_type().get_bit_width() == 8 { + flags_iv + } else if flags_iv.get_type().get_bit_width() > 8 { + self.builder + .build_int_truncate(flags_iv, self.context.i8_type(), "fl_trunc") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + } else { + self.builder + .build_int_z_extend(flags_iv, self.context.i8_type(), "fl_zext") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + }; + self.builder + .build_store(fl_ptr, fl_i8) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store flags: {e}")))?; + + // failing_addr at +4 (u64) + let addr_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + inst_buffer, + &[self + .context + .i32_type() + .const_int((std::mem::size_of::() + 4) as u64, false)], + "exprerr_addr_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to get addr GEP: {e}")))? + }; + let addr_i64 = if failing_addr_iv.get_type().get_bit_width() == 64 { + failing_addr_iv + } else if failing_addr_iv.get_type().get_bit_width() > 64 { + self.builder + .build_int_truncate(failing_addr_iv, self.context.i64_type(), "addr_trunc") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + } else { + self.builder + .build_int_z_extend(failing_addr_iv, self.context.i64_type(), "addr_zext") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + }; + let addr_ptr_cast = self + .builder + .build_pointer_cast( + addr_ptr, + self.context.ptr_type(AddressSpace::default()), + "exprerr_addr_i64_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.builder + .build_store(addr_ptr_cast, addr_i64) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store failing_addr: {e}")))?; + + // Already accumulated; EndInstruction will send the whole event + Ok(()) + } +} diff --git a/ghostscope-compiler/src/ebpf/codegen/format.rs b/ghostscope-compiler/src/ebpf/codegen/format.rs new file mode 100644 index 0000000..7e2e0e1 --- /dev/null +++ b/ghostscope-compiler/src/ebpf/codegen/format.rs @@ -0,0 +1,1511 @@ +use super::*; + +impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { + /// Compile formatted print statement: collect all variable data and send as PrintComplexFormat instruction + pub(super) fn resolve_memory_format_address( + &mut self, + expr: &crate::script::ast::Expr, + ) -> Result> { + if let Ok(addr) = self.resolve_runtime_address_from_expr(expr) { + return Ok(addr); + } + + let dwarf_error = match self.query_dwarf_for_complex_expr(expr) { + Ok(Some(var)) => { + let pc_address = self.get_compile_time_context()?.pc_address; + return self.variable_read_plan_to_runtime_address(&var, pc_address, None); + } + Ok(None) => None, + Err(err) => { + tracing::debug!( + error = %err, + "DWARF address resolution unavailable for memory format expression; trying script value fallback" + ); + Some(err) + } + }; + + match self.compile_expr(expr)? { + BasicValueEnum::PointerValue(pv) => self + .builder + .build_ptr_to_int(pv, self.context.i64_type(), "ptr_to_i64") + .map(|value| RuntimeAddress::available(value, self.context)) + .map_err(|e| CodeGenError::Builder(e.to_string())), + _ => { + Err(dwarf_error + .unwrap_or_else(|| CodeGenError::VariableNotFound(format!("{expr:?}")))) + } + } + } + + pub(super) fn compile_formatted_print( + &mut self, + format: &str, + args: &[crate::script::ast::Expr], + ) -> Result { + info!( + "Compiling formatted print: '{}' with {} arguments", + format, + args.len() + ); + let format_string_index = self.trace_context.add_string(format.to_string()); + let mut complex_args: Vec> = Vec::with_capacity(args.len()); + + // Parse placeholders from the format string to support extended specifiers + #[derive(Clone, Copy, Debug, PartialEq)] + enum Conv { + Default, + HexLower, + HexUpper, + Ptr, + Ascii, + } + #[derive(Clone, Debug, PartialEq)] + enum LenSpec { + None, + Static(usize), + Star, + Capture(String), + } + + fn parse_static_len(spec: &str) -> Option { + if spec.chars().all(|c| c.is_ascii_digit()) { + return spec.parse::().ok(); + } + if let Some(hex) = spec.strip_prefix("0x") { + if !hex.is_empty() && hex.chars().all(|c| c.is_ascii_hexdigit()) { + return usize::from_str_radix(hex, 16).ok(); + } + } + if let Some(oct) = spec.strip_prefix("0o") { + if !oct.is_empty() && oct.chars().all(|c| matches!(c, '0'..='7')) { + return usize::from_str_radix(oct, 8).ok(); + } + } + if let Some(bin) = spec.strip_prefix("0b") { + if !bin.is_empty() && bin.chars().all(|c| matches!(c, '0' | '1')) { + return usize::from_str_radix(bin, 2).ok(); + } + } + None + } + + fn parse_slots(fmt: &str) -> Vec<(Conv, LenSpec)> { + let mut res = Vec::new(); + let mut it = fmt.chars().peekable(); + while let Some(ch) = it.next() { + if ch == '{' { + if it.peek() == Some(&'{') { + it.next(); + continue; + } + let mut content = String::new(); + for c in it.by_ref() { + if c == '}' { + break; + } + content.push(c); + } + if content.is_empty() { + res.push((Conv::Default, LenSpec::None)); + } else if let Some(rest) = content.strip_prefix(':') { + let mut sit = rest.chars(); + let conv = match sit.next().unwrap_or(' ') { + 'x' => Conv::HexLower, + 'X' => Conv::HexUpper, + 'p' => Conv::Ptr, + 's' => Conv::Ascii, + _ => Conv::Default, + }; + let rest: String = sit.collect(); + let lens = if rest.is_empty() { + LenSpec::None + } else if let Some(r) = rest.strip_prefix('.') { + if r == "*" { + LenSpec::Star + } else if let Some(s) = r.strip_suffix('$') { + LenSpec::Capture(s.to_string()) + } else if let Some(n) = parse_static_len(r) { + LenSpec::Static(n) + } else { + LenSpec::None + } + } else { + LenSpec::None + }; + res.push((conv, lens)); + } else { + res.push((Conv::Default, LenSpec::None)); + } + } + } + res + } + + let slots = parse_slots(format); + let mut ai = 0usize; // arg cursor + for (conv, lens) in slots.into_iter() { + match conv { + Conv::Default => { + if ai >= args.len() { + break; + } + let expr = &args[ai]; + let a = self.compile_print_expr_with_builtin_exprerror(expr, |ctx| { + ctx.resolve_expr_to_arg(expr) + })?; + complex_args.push(a); + ai += 1; + } + Conv::Ptr => { + if ai >= args.len() { + break; + } + // Force pointer address payload (u64) regardless of DWARF shape + let expr = &args[ai]; + // Try compile to IntValue or PointerValue + let val = self.compile_expr(expr)?; + let iv = match val { + BasicValueEnum::IntValue(iv) => iv, + BasicValueEnum::PointerValue(pv) => self + .builder + .build_ptr_to_int(pv, self.context.i64_type(), "ptr_to_i64") + .map_err(|e| CodeGenError::Builder(e.to_string()))?, + _ => self + .compile_dwarf_expression(expr) + .and_then(|bv| match bv { + BasicValueEnum::IntValue(iv) => Ok(iv), + BasicValueEnum::PointerValue(pv) => self + .builder + .build_ptr_to_int(pv, self.context.i64_type(), "ptr_to_i64") + .map_err(|e| CodeGenError::Builder(e.to_string())), + _ => Err(CodeGenError::TypeError("pointer expected".into())), + })?, + }; + complex_args.push(ComplexArg { + var_name_index: self + .trace_context + .add_variable_name(self.expr_to_name(expr)), + type_index: self.add_synthesized_type_index_for_kind(TypeKind::Pointer), + access_path: Vec::new(), + data_len: 8, + source: ComplexArgSource::ComputedInt { + value: iv, + byte_len: 8, + }, + }); + ai += 1; + } + Conv::HexLower | Conv::HexUpper | Conv::Ascii => { + // Memory dump; handle static length at compile time. Other cases use default read and let user space trim. + // Handle star: consume length arg (as computed int) then value arg + let wants_ascii = matches!(conv, Conv::Ascii); + match lens { + LenSpec::Static(n) if ai < args.len() => { + // Resolve value expr address + let expr = &args[ai]; + let addr_iv = self.resolve_memory_format_address(expr)?; + complex_args.push(ComplexArg { + var_name_index: self + .trace_context + .add_variable_name(self.expr_to_name(expr)), + type_index: self + .trace_context + .add_type(ghostscope_dwarf::TypeInfo::ArrayType { + element_type: Box::new(ghostscope_dwarf::TypeInfo::BaseType { + name: "u8".into(), + size: 1, + encoding: ghostscope_dwarf::constants::DW_ATE_unsigned_char + .0 + as u16, + }), + element_count: Some(n as u64), + total_size: Some(n as u64), + }), + access_path: Vec::new(), + data_len: n, + source: ComplexArgSource::MemDump { + address: addr_iv, + len: n, + }, + }); + ai += 1; + } + LenSpec::Star => { + // Dynamic length: consume length arg, then create a dynamic mem-dump for value + if ai + 1 >= args.len() { + break; + } + // length argument + let len_expr = &args[ai]; + let len_val = self.compile_expr(len_expr)?; + let (len_iv, byte_len) = match len_val { + BasicValueEnum::IntValue(iv) => (iv, 8usize), + _ => { + return Err(CodeGenError::TypeError( + "length must be integer".into(), + )) + } + }; + complex_args.push(ComplexArg { + var_name_index: self + .trace_context + .add_variable_name("__len".into()), + type_index: self.add_synthesized_type_index_for_kind(TypeKind::U64), + access_path: Vec::new(), + data_len: byte_len, + source: ComplexArgSource::ComputedInt { + value: len_iv, + byte_len, + }, + }); + + // value expression -> dynamic memdump with cap + let val_expr = &args[ai + 1]; + let addr_iv = self.resolve_memory_format_address(val_expr)?; + // Reserve up to configured per-arg cap for dynamic slices + let cap = self.compile_options.mem_dump_cap as usize; + complex_args.push(ComplexArg { + var_name_index: self + .trace_context + .add_variable_name(self.expr_to_name(val_expr)), + type_index: self + .trace_context + .add_type(ghostscope_dwarf::TypeInfo::ArrayType { + element_type: Box::new(ghostscope_dwarf::TypeInfo::BaseType { + name: "u8".into(), + size: 1, + encoding: ghostscope_dwarf::constants::DW_ATE_unsigned_char + .0 + as u16, + }), + element_count: Some(cap as u64), + total_size: Some(cap as u64), + }), + access_path: Vec::new(), + data_len: cap, + source: ComplexArgSource::MemDumpDynamic { + address: addr_iv, + len_value: len_iv, + max_len: cap, + }, + }); + ai += 2; + } + LenSpec::Capture(name) => { + // Use script variable `name` as length; emit a length argument + a dynamic mem-dump argument + if ai >= args.len() { + break; + } + if !self.variable_exists(&name) { + return Err(CodeGenError::TypeError(format!( + "capture length variable '{name}' not found" + ))); + } + // length as computed int + let len_val = self.load_variable(&name)?; + let (len_iv, byte_len) = match len_val { + BasicValueEnum::IntValue(iv) => (iv, 8usize), + BasicValueEnum::PointerValue(pv) => ( + self.builder + .build_ptr_to_int( + pv, + self.context.i64_type(), + "len_ptr_to_i64", + ) + .map_err(|e| CodeGenError::Builder(e.to_string()))?, + 8usize, + ), + _ => { + return Err(CodeGenError::TypeError( + "length must be integer/pointer".into(), + )) + } + }; + complex_args.push(ComplexArg { + var_name_index: self.trace_context.add_variable_name(name.clone()), + type_index: self.add_synthesized_type_index_for_kind(TypeKind::U64), + access_path: Vec::new(), + data_len: byte_len, + source: ComplexArgSource::ComputedInt { + value: len_iv, + byte_len, + }, + }); + + // value + let val_expr = &args[ai]; + let addr_iv = self.resolve_memory_format_address(val_expr)?; + let cap = self.compile_options.mem_dump_cap as usize; + complex_args.push(ComplexArg { + var_name_index: self + .trace_context + .add_variable_name(self.expr_to_name(val_expr)), + type_index: self + .trace_context + .add_type(ghostscope_dwarf::TypeInfo::ArrayType { + element_type: Box::new(ghostscope_dwarf::TypeInfo::BaseType { + name: "u8".into(), + size: 1, + encoding: ghostscope_dwarf::constants::DW_ATE_unsigned_char + .0 + as u16, + }), + element_count: Some(cap as u64), + total_size: Some(cap as u64), + }), + access_path: Vec::new(), + data_len: cap, + source: ComplexArgSource::MemDumpDynamic { + address: addr_iv, + len_value: len_iv, + max_len: cap, + }, + }); + ai += 1; + } + _ => { + // None: resolve value directly + if ai >= args.len() { + break; + } + complex_args.push(self.resolve_expr_to_arg(&args[ai])?); + ai += 1; + } + } + let _ = wants_ascii; // reserved for future per-arg metadata + } + } + } + self.generate_print_complex_format_instruction(format_string_index, &complex_args)?; + Ok(1) + } + /// Generate eBPF code for PrintComplexFormat instruction with runtime reads for variables + pub(super) fn generate_print_complex_format_instruction( + &mut self, + format_string_index: u16, + complex_args: &[ComplexArg<'ctx>], + ) -> Result<()> { + use InstructionType::PrintComplexFormat as IT; + + // Keep a single formatted print within the remaining event budget on the current + // control-flow path, while still leaving room for EndInstruction. + let instruction_budget = print_complex_format_instruction_budget( + self.compile_options.max_trace_event_size as usize, + self.compile_time_event_bytes_upper_bound, + ); + let fixed_overhead = std::mem::size_of::() + + std::mem::size_of::(); + + // First pass: accumulate header bytes and static payload, record dynamic args + let mut arg_count = 0u8; + let mut headers_total = 0usize; + let mut static_payload_total = 0usize; + let mut dynamic_max_lens: Vec = Vec::new(); + let mut header_lens: Vec = Vec::with_capacity(complex_args.len()); + for a in complex_args { + // Header bytes per-arg: var_name_index(2) + type_index(2) + access_path_len(1) + status(1) + data_len(2) + access_path + let header_len = 2 + 2 + 1 + 1 + 2 + a.access_path.len(); + header_lens.push(header_len); + headers_total += header_len; + + match &a.source { + ComplexArgSource::ImmediateBytes { bytes } => static_payload_total += bytes.len(), + ComplexArgSource::AddressValue { .. } => static_payload_total += 8, + ComplexArgSource::RuntimeRead { .. } => { + static_payload_total += + std::cmp::max(a.data_len, DYNAMIC_READ_ERROR_PAYLOAD_LEN) + } + ComplexArgSource::ComputedInt { byte_len, .. } => static_payload_total += *byte_len, + ComplexArgSource::MemDump { len, .. } => { + static_payload_total += std::cmp::max(*len, DYNAMIC_READ_ERROR_PAYLOAD_LEN) + } + ComplexArgSource::MemDumpDynamic { max_len, .. } => dynamic_max_lens.push(*max_len), + } + arg_count = arg_count.saturating_add(1); + } + + // Static payload keeps its existing layout; dynamic payload shares the remaining + // instruction budget fairly so later {:s.*}/{:x.*} arguments do not get starved. + let remaining_for_payload = instruction_budget + .saturating_sub(fixed_overhead) + .saturating_sub(headers_total) + .saturating_sub(static_payload_total); + let dynamic_reservations = + allocate_dynamic_payload_reservations(&dynamic_max_lens, remaining_for_payload); + let mut dynamic_reservations_iter = dynamic_reservations.into_iter(); + + // Second pass: decide effective reserved payload for each arg + // Default to computed static payload; dynamic args share the event-derived budget + let mut effective_reserved: Vec = Vec::with_capacity(complex_args.len()); + for a in complex_args { + let reserved = match &a.source { + ComplexArgSource::ImmediateBytes { bytes } => bytes.len(), + ComplexArgSource::AddressValue { .. } => 8, + ComplexArgSource::RuntimeRead { .. } => { + std::cmp::max(a.data_len, DYNAMIC_READ_ERROR_PAYLOAD_LEN) + } + ComplexArgSource::ComputedInt { byte_len, .. } => *byte_len, + ComplexArgSource::MemDump { len, .. } => { + std::cmp::max(*len, DYNAMIC_READ_ERROR_PAYLOAD_LEN) + } + ComplexArgSource::MemDumpDynamic { .. } => { + dynamic_reservations_iter.next().unwrap_or(0) + } + }; + effective_reserved.push(reserved); + } + + // Now compute final inst_data_size using effective reservations + let total_args_payload: usize = + header_lens.iter().sum::() + effective_reserved.iter().sum::(); + let inst_data_size = std::mem::size_of::() + total_args_payload; + let total_size = std::mem::size_of::() + inst_data_size; + + // Reserve buffer directly in accumulation buffer to avoid extra copy + let buffer = self + .reserve_instruction_region_or_return_zero(total_size as u64)? + .into_value_after_runtime_returns(); + + // Avoid memset; global buffer is zero-initialized + + // Write InstructionHeader + let inst_type_val = self.context.i8_type().const_int(IT as u8 as u64, false); + self.builder + .build_store(buffer, inst_type_val) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store inst_type: {e}")))?; + // data_length at +1 + let data_length_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + buffer, + &[self.context.i32_type().const_int(1, false)], + "data_length_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get data_length GEP: {e}")) + })? + }; + let data_length_i16_ptr = self + .builder + .build_pointer_cast( + data_length_ptr, + self.context.ptr_type(AddressSpace::default()), + "data_length_i16_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast data_length ptr: {e}")))?; + let data_length_val = self + .context + .i16_type() + .const_int(inst_data_size as u64, false); + self.builder + .build_store(data_length_i16_ptr, data_length_val) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store data_length: {e}")))?; + + // Write PrintComplexFormatData at offset 4 + let data_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + buffer, + &[self.context.i32_type().const_int(4, false)], + "pcf_data_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get pcf_data_ptr GEP: {e}")) + })? + }; + + // format_string_index (u16) at +0 + let fsi_ptr = self + .builder + .build_pointer_cast( + data_ptr, + self.context.ptr_type(AddressSpace::default()), + "fsi_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast fsi_ptr: {e}")))?; + let fsi_val = self + .context + .i16_type() + .const_int(format_string_index as u64, false); + self.builder + .build_store(fsi_ptr, fsi_val) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store fsi: {e}")))?; + // arg_count (u8) at +2 + let arg_cnt_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + data_ptr, + &[self.context.i32_type().const_int(2, false)], + "arg_count_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to get arg_count GEP: {e}")))? + }; + self.builder + .build_store( + arg_cnt_ptr, + self.context.i8_type().const_int(arg_count as u64, false), + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store arg_count: {e}")))?; + + // Start of variable payload after PrintComplexFormatData — use computed effective reservations + let mut offset = std::mem::size_of::(); + for (arg_index, a) in complex_args.iter().enumerate() { + // Per-arg reserved payload length + let reserved_len = effective_reserved[arg_index]; + + // Base pointer = data_ptr + offset + let arg_base = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + data_ptr, + &[self.context.i32_type().const_int(offset as u64, false)], + "arg_base", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get arg_base GEP: {e}")) + })? + }; + + // var_name_index(u16) at +0 + let vni_cast = self + .builder + .build_pointer_cast( + arg_base, + self.context.ptr_type(AddressSpace::default()), + "vni_cast", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast vni ptr: {e}")))?; + self.builder + .build_store( + vni_cast, + self.context + .i16_type() + .const_int(a.var_name_index as u64, false), + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store vni: {e}")))?; + + // type_index(u16) at +2 + let ti_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + arg_base, + &[self.context.i32_type().const_int(2, false)], + "ti_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to get ti GEP: {e}")))? + }; + let ti_cast = self + .builder + .build_pointer_cast( + ti_ptr, + self.context.ptr_type(AddressSpace::default()), + "ti_cast", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast ti ptr: {e}")))?; + self.builder + .build_store( + ti_cast, + self.context + .i16_type() + .const_int(a.type_index as u64, false), + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store ti: {e}")))?; + + // status(u8) at +5 + let apl_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + arg_base, + &[self.context.i32_type().const_int(5, false)], + "status_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get status GEP: {e}")) + })? + }; + self.builder + .build_store(apl_ptr, self.context.i8_type().const_int(0, false)) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store status: {e}")))?; + + // access_path_len(u8) at +4 + let apl_ptr2 = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + arg_base, + &[self.context.i32_type().const_int(4, false)], + "apl_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to get apl GEP: {e}")))? + }; + self.builder + .build_store( + apl_ptr2, + self.context + .i8_type() + .const_int(a.access_path.len() as u64, false), + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store apl: {e}")))?; + + // access_path bytes at +6..+6+len + for (i, b) in a.access_path.iter().enumerate() { + let byte_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + arg_base, + &[self.context.i32_type().const_int((6 + i) as u64, false)], + &format!("ap_byte_{i}"), + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get ap byte GEP: {e}")) + })? + }; + self.builder + .build_store(byte_ptr, self.context.i8_type().const_int(*b as u64, false)) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to store ap byte: {e}")) + })?; + } + + // data_len(u16) at +6+path_len (store reserved_len to keep layout consistent) + let dl_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + arg_base, + &[self + .context + .i32_type() + .const_int((6 + a.access_path.len()) as u64, false)], + "dl_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to get dl GEP: {e}")))? + }; + let dl_cast = self + .builder + .build_pointer_cast( + dl_ptr, + self.context.ptr_type(AddressSpace::default()), + "dl_cast", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast dl ptr: {e}")))?; + self.builder + .build_store( + dl_cast, + self.context + .i16_type() + .const_int(reserved_len as u64, false), + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store data_len: {e}")))?; + + // variable data starts at +8+path_len + let var_data_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + arg_base, + &[self + .context + .i32_type() + .const_int((8 + a.access_path.len()) as u64, false)], + "var_data_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get var_data GEP: {e}")) + })? + }; + + // No dynamic cursor; we keep a compile-time offset and use reserved_len for layout + + match &a.source { + ComplexArgSource::ImmediateBytes { bytes, .. } => { + for (i, b) in bytes.iter().enumerate() { + let byte_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + var_data_ptr, + &[self.context.i32_type().const_int(i as u64, false)], + &format!("var_byte_{i}"), + ) + .map_err(|e| { + CodeGenError::LLVMError(format!( + "Failed to get var byte GEP: {e}" + )) + })? + }; + self.builder + .build_store( + byte_ptr, + self.context.i8_type().const_int(*b as u64, false), + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to store var byte: {e}")) + })?; + } + // data_len already set to reserved_len + } + ComplexArgSource::MemDump { address, len } => { + // Directly probe-read into payload to avoid byte-wise copies + let ptr_ty = self.context.ptr_type(AddressSpace::default()); + let i64_ty = self.context.i64_type(); + let i32_ty = self.context.i32_type(); + + // Helper: long bpf_probe_read_user(void *dst, u32 size, const void *src) + let dst_ptr = self + .builder + .build_pointer_cast(var_data_ptr, ptr_ty, "md_dst_ptr") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let base_src_ptr = self + .builder + .build_int_to_ptr(address.value, ptr_ty, "md_src_ptr") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let offsets_found = address.offsets_found; + let not_found = self + .builder + .build_not(offsets_found, "md_offsets_miss") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let null_ptr = ptr_ty.const_null(); + let src_ptr = self + .builder + .build_select::, _>( + offsets_found, + base_src_ptr.into(), + null_ptr.into(), + "md_src_or_null", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + .into_pointer_value(); + let len_const = i32_ty.const_int(*len as u64, false); + let zero_i32 = i32_ty.const_zero(); + let effective_len = self + .builder + .build_select::, _>( + offsets_found, + len_const.into(), + zero_i32.into(), + "md_len_or_zero", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + .into_int_value(); + let ret = self + .create_bpf_helper_call( + aya_ebpf_bindings::bindings::bpf_func_id::BPF_FUNC_probe_read_user + as u64, + &[dst_ptr.into(), effective_len.into(), src_ptr.into()], + i64_ty.into(), + "probe_read_user_memdump", + )? + .into_int_value(); + + // Branch on ret == 0 and offsets available + let ok_pred = self + .builder + .build_int_compare( + inkwell::IntPredicate::EQ, + ret, + i64_ty.const_zero(), + "md_ok", + ) + .map_err(|e| CodeGenError::Builder(e.to_string()))?; + let ok = self + .builder + .build_and(ok_pred, offsets_found, "md_ok_with_offsets") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let func = self.current_function("compile memdump status branch")?; + let ok_b = self.context.append_basic_block(func, "md_ok"); + let err_b = self.context.append_basic_block(func, "md_err"); + let cont_b = self.context.append_basic_block(func, "md_cont"); + self.builder + .build_conditional_branch(ok, ok_b, err_b) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + // ok: nothing extra to do + self.builder.position_at_end(ok_b); + self.builder + .build_unconditional_branch(cont_b) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + // err: either offsets missing or helper failure + self.builder.position_at_end(err_b); + let offsets_err_b = self.context.append_basic_block(func, "md_offsets_err"); + let helper_err_b = self.context.append_basic_block(func, "md_helper_err"); + self.builder + .build_conditional_branch(not_found, offsets_err_b, helper_err_b) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.builder.position_at_end(offsets_err_b); + self.builder + .build_store( + apl_ptr, + self.context + .i8_type() + .const_int(VariableStatus::OffsetsUnavailable as u64, false), + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.mark_any_fail()?; + self.builder + .build_unconditional_branch(cont_b) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.builder.position_at_end(helper_err_b); + self.builder + .build_store( + apl_ptr, + self.context + .i8_type() + .const_int(VariableStatus::ReadError as u64, false), + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + // write errno + addr (12 bytes) to var_data_ptr; reserved sizing ensures this fits + let errno_ptr = self + .builder + .build_pointer_cast( + var_data_ptr, + self.context.ptr_type(AddressSpace::default()), + "errno_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let errno = self.build_errno_i32(ret, "errno_i32")?; + self.builder + .build_store(errno_ptr, errno) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let addr_ptr_i8 = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + var_data_ptr, + &[self.context.i32_type().const_int(4, false)], + "addr_ptr_i8", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + }; + let addr_ptr = self + .builder + .build_pointer_cast( + addr_ptr_i8, + self.context.ptr_type(AddressSpace::default()), + "addr_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.builder + .build_store(addr_ptr, address.value) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.mark_any_fail()?; + self.builder + .build_unconditional_branch(cont_b) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.builder.position_at_end(cont_b); + } + ComplexArgSource::MemDumpDynamic { + address, + len_value, + max_len: _, + } => { + // Clamp runtime read to effective reserved length for this arg + let eff_max_len = effective_reserved[arg_index] as u32; + // Read up to rlen=min(len_value, max_len) into helper buffer, then copy bytes into payload + let i32_ty = self.context.i32_type(); + let rlen_i32 = if len_value.get_type().get_bit_width() > 32 { + self.builder + .build_int_truncate(*len_value, i32_ty, "mdd_len_trunc") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + } else if len_value.get_type().get_bit_width() < 32 { + self.builder + .build_int_z_extend(*len_value, i32_ty, "mdd_len_zext") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + } else { + *len_value + }; + // clamp negative to 0 + let zero_i32 = i32_ty.const_zero(); + let is_neg = self + .builder + .build_int_compare( + inkwell::IntPredicate::SLT, + rlen_i32, + zero_i32, + "mdd_len_neg", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let rlen_nn = self + .builder + .build_select(is_neg, zero_i32, rlen_i32, "mdd_len_nn") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + .into_int_value(); + + // Bound length by the reserved space (already ensures >= 12B when possible) + let max_const = i32_ty.const_int(eff_max_len as u64, false); + let gt = self + .builder + .build_int_compare(inkwell::IntPredicate::UGT, rlen_nn, max_const, "mdd_gt") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let sel_len = self + .builder + .build_select(gt, max_const, rlen_nn, "mdd_rlen") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + .into_int_value(); + + // If effective length is zero, mark status and skip read. + let func = self.current_function("compile memdump dynamic length branch")?; + let zero_b = self.context.append_basic_block(func, "mdd_len_zero"); + let read_b = self.context.append_basic_block(func, "mdd_len_read"); + let cont_b = self.context.append_basic_block(func, "mdd_cont"); + let is_zero = self + .builder + .build_int_compare( + inkwell::IntPredicate::EQ, + sel_len, + i32_ty.const_zero(), + "mdd_len_zero", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.builder + .build_conditional_branch(is_zero, zero_b, read_b) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + + // Zero-length branch: set status=ZeroLength and continue. + self.builder.position_at_end(zero_b); + self.builder + .build_store( + apl_ptr, + self.context + .i8_type() + .const_int(VariableStatus::ZeroLength as u64, false), + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.builder + .build_unconditional_branch(cont_b) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + + // Non-zero path: perform probe_read_user directly into var_data_ptr + self.builder.position_at_end(read_b); + let dst_ptr = self + .builder + .build_bit_cast( + var_data_ptr, + self.context.ptr_type(AddressSpace::default()), + "mdd_dst_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let ptr_ty = self.context.ptr_type(AddressSpace::default()); + let base_src_ptr = self + .builder + .build_int_to_ptr(address.value, ptr_ty, "mdd_src_ptr") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let offsets_found = address.offsets_found; + let not_found = self + .builder + .build_not(offsets_found, "mdd_dyn_offsets_miss") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let null_ptr = ptr_ty.const_null(); + let src_ptr = self + .builder + .build_select::, _>( + offsets_found, + base_src_ptr.into(), + null_ptr.into(), + "mdd_src_or_null", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + .into_pointer_value(); + let zero_i32 = self.context.i32_type().const_zero(); + let effective_len = self + .builder + .build_select::, _>( + offsets_found, + sel_len.into(), + zero_i32.into(), + "mdd_len_or_zero", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + .into_int_value(); + let ret = self + .create_bpf_helper_call( + BPF_FUNC_probe_read_user as u64, + &[dst_ptr, effective_len.into(), src_ptr.into()], + self.context.i64_type().into(), + "probe_read_user_dyn", + )? + .into_int_value(); + let ok_pred = self + .builder + .build_int_compare( + inkwell::IntPredicate::EQ, + ret, + self.context.i64_type().const_zero(), + "mdd_ok", + ) + .map_err(|e| CodeGenError::Builder(e.to_string()))?; + let ok = self + .builder + .build_and(ok_pred, offsets_found, "mdd_ok_with_offsets") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let ok_b = self.context.append_basic_block(func, "mdd_ok"); + let err_b = self.context.append_basic_block(func, "mdd_err"); + self.builder + .build_conditional_branch(ok, ok_b, err_b) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + // ok: data already in var_data_ptr + self.builder.position_at_end(ok_b); + self.builder + .build_unconditional_branch(cont_b) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + // err: status+errno+addr (clamped by reserved sizing) + self.builder.position_at_end(err_b); + let offsets_err_b = self.context.append_basic_block(func, "mdd_offsets_err"); + let helper_err_b = self.context.append_basic_block(func, "mdd_helper_err"); + self.builder + .build_conditional_branch(not_found, offsets_err_b, helper_err_b) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.builder.position_at_end(offsets_err_b); + self.builder + .build_store( + apl_ptr, + self.context + .i8_type() + .const_int(VariableStatus::OffsetsUnavailable as u64, false), + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.mark_any_fail()?; + self.builder + .build_unconditional_branch(cont_b) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.builder.position_at_end(helper_err_b); + self.builder + .build_store( + apl_ptr, + self.context + .i8_type() + .const_int(VariableStatus::ReadError as u64, false), + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + if eff_max_len >= 4 { + let errno_ptr = self + .builder + .build_pointer_cast( + var_data_ptr, + self.context.ptr_type(AddressSpace::default()), + "mdd_errno_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let errno = self.build_errno_i32(ret, "mdd_errno_i32")?; + self.builder + .build_store(errno_ptr, errno) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + } + if eff_max_len as usize >= DYNAMIC_READ_ERROR_PAYLOAD_LEN { + let addr_ptr_i8 = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + var_data_ptr, + &[self.context.i32_type().const_int(4, false)], + "mdd_addr_ptr_i8", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + }; + let addr_ptr = self + .builder + .build_pointer_cast( + addr_ptr_i8, + self.context.ptr_type(AddressSpace::default()), + "mdd_addr_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.builder + .build_store(addr_ptr, address.value) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + } + self.mark_any_fail()?; + self.builder + .build_unconditional_branch(cont_b) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.builder.position_at_end(cont_b); + } + ComplexArgSource::ComputedInt { value, byte_len } => { + // Write computed integer into payload buffer based on requested byte_len + // Ensure the destination pointer element type matches the stored value type. + match *byte_len { + 1 => { + let bitw = value.get_type().get_bit_width(); + let v = if bitw == 1 { + // Bool: zero-extend to keep 0/1 in payload + self.builder + .build_int_z_extend( + *value, + self.context.i8_type(), + "expr_zext_bool_i8", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + } else if bitw < 8 { + self.builder + .build_int_s_extend( + *value, + self.context.i8_type(), + "expr_sext_i8", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + } else if bitw > 8 { + // wider than i8 -> truncate + self.builder + .build_int_truncate( + *value, + self.context.i8_type(), + "expr_trunc_i8", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + } else { + // exactly i8 + *value + }; + // var_data_ptr is i8* already; store directly + self.builder + .build_store(var_data_ptr, v) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + } + 2 => { + let bitw = value.get_type().get_bit_width(); + let v = if bitw < 16 { + self.builder + .build_int_s_extend( + *value, + self.context.i16_type(), + "expr_sext_i16", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + } else if bitw > 16 { + self.builder + .build_int_truncate( + *value, + self.context.i16_type(), + "expr_trunc_i16", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + } else { + // equal width: i16 + *value + }; + let i16_ptr_ty = self.context.ptr_type(AddressSpace::default()); + let cast_ptr = self + .builder + .build_pointer_cast(var_data_ptr, i16_ptr_ty, "expr_i16_ptr") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.builder + .build_store(cast_ptr, v) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + } + 4 => { + let bitw = value.get_type().get_bit_width(); + let v = if bitw < 32 { + self.builder + .build_int_s_extend( + *value, + self.context.i32_type(), + "expr_sext_i32", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + } else if bitw > 32 { + self.builder + .build_int_truncate( + *value, + self.context.i32_type(), + "expr_trunc_i32", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + } else { + // equal width: i32 + *value + }; + let i32_ptr_ty = self.context.ptr_type(AddressSpace::default()); + let cast_ptr = self + .builder + .build_pointer_cast(var_data_ptr, i32_ptr_ty, "expr_i32_ptr") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.builder + .build_store(cast_ptr, v) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + } + 8 => { + let v64 = if value.get_type().get_bit_width() < 64 { + self.builder + .build_int_s_extend( + *value, + self.context.i64_type(), + "expr_sext", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + } else { + *value + }; + let i64_ptr_ty = self.context.ptr_type(AddressSpace::default()); + let cast_ptr = self + .builder + .build_pointer_cast(var_data_ptr, i64_ptr_ty, "expr_i64_ptr") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.builder + .build_store(cast_ptr, v64) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + } + n => { + // Fallback: write the lowest n bytes little-endian + // Truncate/extend to 64-bit, then emit byte stores + let v64 = if value.get_type().get_bit_width() < 64 { + self.builder + .build_int_z_extend( + *value, + self.context.i64_type(), + "expr_zext_fallback", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + } else { + *value + }; + for i in 0..n { + // Extract byte i + let shift = + self.context.i64_type().const_int((i * 8) as u64, false); + let shifted = self + .builder + .build_right_shift(v64, shift, false, &format!("expr_shr_{i}")) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let byte = self + .builder + .build_int_truncate( + shifted, + self.context.i8_type(), + &format!("expr_byte_{i}"), + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let byte_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + var_data_ptr, + &[self.context.i32_type().const_int(i as u64, false)], + &format!("expr_byte_ptr_{i}"), + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + }; + self.builder + .build_store(byte_ptr, byte) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + } + } + } + } + ComplexArgSource::RuntimeRead { + address, + dwarf_type, + module_for_offsets, + } => { + // Read from user memory at runtime via BPF helper + let ptr_type = self.context.ptr_type(AddressSpace::default()); + let i32_type = self.context.i32_type(); + let i64_type = self.context.i64_type(); + let dst_ptr = self + .builder + .build_bit_cast(var_data_ptr, ptr_type, "dst_ptr") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let size_val = i32_type.const_int(a.data_len as u64, false); + let src_addr = self.resolve_planned_address( + address, + Some(apl_ptr), + module_for_offsets.as_deref(), + )?; + let offsets_found = src_addr.offsets_found; + let current_fn = self.current_function("compile complex variable read")?; + let cont2_block = self.context.append_basic_block(current_fn, "after_read"); + let skip_block = self.context.append_basic_block(current_fn, "offsets_skip"); + let found_block = self.context.append_basic_block(current_fn, "offsets_found"); + self.builder + .build_conditional_branch(offsets_found, found_block, skip_block) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + + // Offsets missing: record failure and continue without helper access. + self.builder.position_at_end(skip_block); + self.mark_any_fail()?; + self.builder + .build_unconditional_branch(cont2_block) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + + // Offsets found: proceed with null check and helper call. + self.builder.position_at_end(found_block); + let src_ptr = self + .builder + .build_int_to_ptr(src_addr.value, ptr_type, "src_ptr") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + + // status_ptr was stored in apl_ptr earlier (we named it status_ptr) + // Build NULL check + let zero64 = i64_type.const_zero(); + let is_null = self + .builder + .build_int_compare( + inkwell::IntPredicate::EQ, + src_addr.value, + zero64, + "is_null", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let null_block = self.context.append_basic_block(current_fn, "null_deref"); + let read_block = self.context.append_basic_block(current_fn, "read_user"); + self.builder + .build_conditional_branch(is_null, null_block, read_block) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + + // NULL path: status=1, keep reserved_len in header, no data write (buffer pre-zeroed) + self.builder.position_at_end(null_block); + self.builder + .build_store( + apl_ptr, + self.context + .i8_type() + .const_int(VariableStatus::NullDeref as u64, false), + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.mark_any_fail()?; + self.builder + .build_unconditional_branch(cont2_block) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + + // Read path + self.builder.position_at_end(read_block); + let ret = self + .create_bpf_helper_call( + BPF_FUNC_probe_read_user as u64, + &[dst_ptr, size_val.into(), src_ptr.into()], + i32_type.into(), + "probe_read_user", + )? + .into_int_value(); + let is_err = self + .builder + .build_int_compare( + inkwell::IntPredicate::SLT, + ret, + i32_type.const_zero(), + "ret_lt_zero", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let err_block = self.context.append_basic_block(current_fn, "read_err"); + let ok_block = self.context.append_basic_block(current_fn, "read_ok"); + self.builder + .build_conditional_branch(is_err, err_block, ok_block) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + + // Error branch: status=2 (read_user failed); write errno+addr payload at start; header keeps reserved_len + self.builder.position_at_end(err_block); + self.builder + .build_store( + apl_ptr, + self.context + .i8_type() + .const_int(VariableStatus::ReadError as u64, false), + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + // write errno at [0..4] + let i32_ptr = self + .builder + .build_pointer_cast( + var_data_ptr, + self.context.ptr_type(AddressSpace::default()), + "errno_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to cast errno ptr: {e}")) + })?; + self.builder.build_store(i32_ptr, ret).map_err(|e| { + CodeGenError::LLVMError(format!("Failed to store errno: {e}")) + })?; + // write addr at [4..12] + let addr_ptr_i8 = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + var_data_ptr, + &[i32_type.const_int(4, false)], + "addr_ptr_i8", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get addr gep: {e}")) + })? + }; + let addr_ptr = self + .builder + .build_pointer_cast( + addr_ptr_i8, + self.context.ptr_type(AddressSpace::default()), + "addr_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to cast addr ptr: {e}")) + })?; + let src_as_i64 = src_addr.value; + self.builder + .build_store(addr_ptr, src_as_i64) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to store addr: {e}")) + })?; + self.mark_any_fail()?; + self.builder + .build_unconditional_branch(cont2_block) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + + // OK branch: success or truncated (header keeps reserved_len) + self.builder.position_at_end(ok_block); + if a.data_len < dwarf_type.size() as usize { + self.builder + .build_store( + apl_ptr, + self.context + .i8_type() + .const_int(VariableStatus::Truncated as u64, false), + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.mark_any_success()?; + self.mark_any_fail()?; + } else { + self.mark_any_success()?; + } + self.builder + .build_unconditional_branch(cont2_block) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + + self.builder.position_at_end(cont2_block); + } + ComplexArgSource::AddressValue { + address, + module_for_offsets, + } => { + let addr = self.resolve_planned_address( + address, + Some(apl_ptr), + module_for_offsets.as_deref(), + )?; + let cast_ptr = self + .builder + .build_pointer_cast( + var_data_ptr, + self.context.ptr_type(AddressSpace::default()), + "addr_store_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.builder + .build_store(cast_ptr, addr.value) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + // header already set to reserved_len (8) + } + } + // Advance compile-time offset by header_len + reserved_len + offset += 2 + 2 + 1 + 1 + a.access_path.len() + 2 + reserved_len; + } + + // Already accumulated; EndInstruction will send the whole event + Ok(()) + } +} diff --git a/ghostscope-compiler/src/ebpf/codegen/instruction_common.rs b/ghostscope-compiler/src/ebpf/codegen/instruction_common.rs new file mode 100644 index 0000000..547cafa --- /dev/null +++ b/ghostscope-compiler/src/ebpf/codegen/instruction_common.rs @@ -0,0 +1,22 @@ +use super::*; + +impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { + pub(super) fn build_errno_i32( + &self, + ret: IntValue<'ctx>, + name: &str, + ) -> Result> { + let i32_ty = self.context.i32_type(); + match ret.get_type().get_bit_width().cmp(&32) { + std::cmp::Ordering::Greater => self + .builder + .build_int_truncate(ret, i32_ty, name) + .map_err(|e| CodeGenError::LLVMError(e.to_string())), + std::cmp::Ordering::Less => self + .builder + .build_int_s_extend(ret, i32_ty, name) + .map_err(|e| CodeGenError::LLVMError(e.to_string())), + std::cmp::Ordering::Equal => Ok(ret), + } + } +} diff --git a/ghostscope-compiler/src/ebpf/codegen/mod.rs b/ghostscope-compiler/src/ebpf/codegen/mod.rs new file mode 100644 index 0000000..7ce6049 --- /dev/null +++ b/ghostscope-compiler/src/ebpf/codegen/mod.rs @@ -0,0 +1,169 @@ +//! Code generation for instructions +//! +//! This module handles the conversion from statements to compiled instructions +//! and generates LLVM IR for individual instructions. + +use super::context::{CodeGenError, EbpfContext, Result, RuntimeAddress}; +use crate::script::{PrintStatement, Program, Statement}; +use aya_ebpf_bindings::bindings::bpf_func_id::BPF_FUNC_probe_read_user; +use ghostscope_protocol::trace_event::{ + BacktraceData, EndInstructionData, InstructionHeader, PrintComplexFormatData, + PrintComplexVariableData, PrintStringIndexData, PrintVariableIndexData, VariableStatus, +}; +use ghostscope_protocol::{InstructionType, TraceContext, TypeKind}; +use inkwell::values::{BasicValueEnum, IntValue}; +use inkwell::AddressSpace; +use std::collections::HashMap; +use tracing::{debug, info, warn}; + +/// Parameters for generating a PrintComplexVariable with runtime read +#[derive(Debug, Clone)] +struct PrintVarRuntimeMeta { + var_name_index: u16, + type_index: u16, + access_path: String, + data_len_limit: usize, +} + +/// Source for complex formatted argument data +#[derive(Debug, Clone)] +enum ComplexArgSource<'ctx> { + RuntimeRead { + address: ghostscope_dwarf::PlannedAddress, + dwarf_type: ghostscope_dwarf::TypeInfo, + module_for_offsets: Option, + }, + /// Memory dump from a pointer/byte address with a static length + MemDump { + address: RuntimeAddress<'ctx>, + len: usize, + }, + /// Memory dump with dynamic runtime length; bytes read up to min(len_value, max_len) + MemDumpDynamic { + address: RuntimeAddress<'ctx>, + len_value: inkwell::values::IntValue<'ctx>, + max_len: usize, + }, + ImmediateBytes { + bytes: Vec, + }, + AddressValue { + address: ghostscope_dwarf::PlannedAddress, + module_for_offsets: Option, + }, + // Newly added: a value computed in LLVM at runtime (e.g., expression result) + ComputedInt { + value: inkwell::values::IntValue<'ctx>, + byte_len: usize, // typically 8 + }, +} + +/// Argument descriptor for PrintComplexFormat +#[derive(Debug, Clone)] +struct ComplexArg<'ctx> { + var_name_index: u16, + type_index: u16, + access_path: Vec, + data_len: usize, + source: ComplexArgSource<'ctx>, +} + +const DYNAMIC_READ_ERROR_PAYLOAD_LEN: usize = 12; + +fn print_complex_format_instruction_budget( + max_trace_event_size: usize, + bytes_reserved_so_far: usize, +) -> usize { + let end_instruction_size = + std::mem::size_of::() + std::mem::size_of::(); + let event_budget = max_trace_event_size + .saturating_sub(bytes_reserved_so_far) + .saturating_sub(end_instruction_size); + let instruction_budget_cap = std::mem::size_of::() + u16::MAX as usize; + event_budget.min(instruction_budget_cap) +} + +fn distribute_budget_fairly(caps: &[usize], budget: usize) -> Vec { + let mut allocations = vec![0; caps.len()]; + let mut active: Vec = caps + .iter() + .enumerate() + .filter_map(|(idx, cap)| (*cap > 0).then_some(idx)) + .collect(); + let mut remaining = budget; + + while remaining > 0 && !active.is_empty() { + let share = remaining / active.len(); + if share == 0 { + for &idx in active.iter().take(remaining) { + allocations[idx] += 1; + } + break; + } + + let mut consumed = 0usize; + let mut next_active = Vec::with_capacity(active.len()); + for idx in active { + let cap_left = caps[idx].saturating_sub(allocations[idx]); + let take = share.min(cap_left); + allocations[idx] += take; + consumed += take; + if allocations[idx] < caps[idx] { + next_active.push(idx); + } + } + + if consumed == 0 { + break; + } + + remaining = remaining.saturating_sub(consumed); + active = next_active; + } + + allocations +} + +fn allocate_dynamic_payload_reservations(max_lens: &[usize], available: usize) -> Vec { + if max_lens.is_empty() || available == 0 { + return vec![0; max_lens.len()]; + } + + let base_caps = vec![DYNAMIC_READ_ERROR_PAYLOAD_LEN; max_lens.len()]; + let base_budget = available.min(DYNAMIC_READ_ERROR_PAYLOAD_LEN.saturating_mul(max_lens.len())); + let mut reservations = distribute_budget_fairly(&base_caps, base_budget); + let remaining_budget = available.saturating_sub(reservations.iter().sum::()); + if remaining_budget == 0 { + return reservations; + } + + let extra_caps: Vec = max_lens + .iter() + .zip(reservations.iter()) + .map(|(max_len, reserved)| { + max_len + .max(&DYNAMIC_READ_ERROR_PAYLOAD_LEN) + .saturating_sub(*reserved) + }) + .collect(); + let extras = distribute_budget_fairly(&extra_caps, remaining_budget); + for (reservation, extra) in reservations.iter_mut().zip(extras) { + *reservation += extra; + } + + reservations +} + +mod args; +mod backtrace; +mod expr_error; +mod format; +mod instruction_common; +mod print_complex_variable; +mod print_string_index; +mod print_variable_index; +mod statements; +mod types; + +#[cfg(test)] +mod tests; diff --git a/ghostscope-compiler/src/ebpf/codegen/print_complex_variable.rs b/ghostscope-compiler/src/ebpf/codegen/print_complex_variable.rs new file mode 100644 index 0000000..d967cb8 --- /dev/null +++ b/ghostscope-compiler/src/ebpf/codegen/print_complex_variable.rs @@ -0,0 +1,872 @@ +use super::*; + +impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { + pub(super) fn generate_print_complex_variable_computed( + &mut self, + var_name_index: u16, + type_index: u16, + byte_len: usize, + value: IntValue<'ctx>, + ) -> Result<()> { + // Build sizes + let header_size = std::mem::size_of::(); + let data_struct_size = std::mem::size_of::(); + let access_path_len: usize = 0; // computed expr has no access path + let total_data_length = data_struct_size + access_path_len + byte_len; + let total_size = header_size + total_data_length; + + // Reserve space directly in the per-CPU accumulation buffer + let inst_buffer = self + .reserve_instruction_region_or_return_zero(total_size as u64)? + .into_value_after_runtime_returns(); + + // Write InstructionHeader.inst_type + let inst_type_val = self + .context + .i8_type() + .const_int(InstructionType::PrintComplexVariable as u64, false); + self.builder + .build_store(inst_buffer, inst_type_val) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store inst_type: {e}")))?; + + // Write data_length (u16) at offset 1 + let data_length_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + inst_buffer, + &[self.context.i32_type().const_int(1, false)], + "data_length_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get data_length GEP: {e}")) + })? + }; + let data_length_ptr_cast = self + .builder + .build_pointer_cast( + data_length_ptr, + self.context.ptr_type(AddressSpace::default()), + "data_length_ptr_cast", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast data_length ptr: {e}")))?; + self.builder + .build_store( + data_length_ptr_cast, + self.context + .i16_type() + .const_int(total_data_length as u64, false), + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store data_length: {e}")))?; + + // Data pointer (after header) + let data_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + inst_buffer, + &[self.context.i32_type().const_int(header_size as u64, false)], + "data_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to get data GEP: {e}")))? + }; + + // var_name_index (u16) + let var_name_index_val = self + .context + .i16_type() + .const_int(var_name_index as u64, false); + let var_name_index_off = + std::mem::offset_of!(PrintComplexVariableData, var_name_index) as u64; + let var_name_index_ptr_i8 = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + data_ptr, + &[self.context.i32_type().const_int(var_name_index_off, false)], + "var_name_index_ptr_i8", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get var_name_index GEP: {e}")) + })? + }; + let var_name_index_ptr_i16 = self + .builder + .build_pointer_cast( + var_name_index_ptr_i8, + self.context.ptr_type(AddressSpace::default()), + "var_name_index_ptr_i16", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to cast var_name_index ptr: {e}")) + })?; + self.builder + .build_store(var_name_index_ptr_i16, var_name_index_val) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store var_name_index: {e}")))?; + + // type_index (u16) + let type_index_offset = std::mem::offset_of!(PrintComplexVariableData, type_index) as u64; + let type_index_ptr_i8 = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + data_ptr, + &[self.context.i32_type().const_int(type_index_offset, false)], + "type_index_ptr_i8", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get type_index GEP: {e}")) + })? + }; + let type_index_ptr = self + .builder + .build_pointer_cast( + type_index_ptr_i8, + self.context.ptr_type(AddressSpace::default()), + "type_index_ptr_i16", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast type_index ptr: {e}")))?; + let type_index_val = self.context.i16_type().const_int(type_index as u64, false); + self.builder + .build_store(type_index_ptr, type_index_val) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store type_index: {e}")))?; + + // access_path_len (u8) = 0 + let access_path_len_off = + std::mem::offset_of!(PrintComplexVariableData, access_path_len) as u64; + let access_path_len_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + data_ptr, + &[self + .context + .i32_type() + .const_int(access_path_len_off, false)], + "access_path_len_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get access_path_len GEP: {e}")) + })? + }; + self.builder + .build_store(access_path_len_ptr, self.context.i8_type().const_zero()) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to store access_path_len: {e}")) + })?; + + // status (u8) = 0 + let status_off = std::mem::offset_of!(PrintComplexVariableData, status) as u64; + let status_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + data_ptr, + &[self.context.i32_type().const_int(status_off, false)], + "status_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to get status GEP: {e}")))? + }; + self.builder + .build_store(status_ptr, self.context.i8_type().const_zero()) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store status: {e}")))?; + + // data_len (u16) + let data_len_off = std::mem::offset_of!(PrintComplexVariableData, data_len) as u64; + let data_len_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + data_ptr, + &[self.context.i32_type().const_int(data_len_off, false)], + "data_len_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to get data_len GEP: {e}")))? + }; + let data_len_ptr_cast = self + .builder + .build_pointer_cast( + data_len_ptr, + self.context.ptr_type(AddressSpace::default()), + "data_len_ptr_cast", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast data_len ptr: {e}")))?; + self.builder + .build_store( + data_len_ptr_cast, + self.context.i16_type().const_int(byte_len as u64, false), + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store data_len: {e}")))?; + + // variable data starts right after PrintComplexVariableData (no access path) + let var_data_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + data_ptr, + &[self + .context + .i32_type() + .const_int(data_struct_size as u64, false)], + "var_data_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to get var_data GEP: {e}")))? + }; + + // Store computed integer value into payload according to byte_len + match byte_len { + 1 => { + let bitw = value.get_type().get_bit_width(); + let v = if bitw == 1 { + // Booleans must serialize as 0/1 + self.builder + .build_int_z_extend(value, self.context.i8_type(), "expr_zext_bool_i8") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + } else if bitw < 8 { + self.builder + .build_int_s_extend(value, self.context.i8_type(), "expr_sext_i8") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + } else if bitw > 8 { + self.builder + .build_int_truncate(value, self.context.i8_type(), "expr_trunc_i8") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + } else { + value + }; + self.builder + .build_store(var_data_ptr, v) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + } + 2 => { + let bitw = value.get_type().get_bit_width(); + let v = if bitw < 16 { + self.builder + .build_int_s_extend(value, self.context.i16_type(), "expr_sext_i16") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + } else if bitw > 16 { + self.builder + .build_int_truncate(value, self.context.i16_type(), "expr_trunc_i16") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + } else { + value + }; + let i16_ptr_ty = self.context.ptr_type(AddressSpace::default()); + let cast_ptr = self + .builder + .build_pointer_cast(var_data_ptr, i16_ptr_ty, "expr_i16_ptr") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.builder + .build_store(cast_ptr, v) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + } + 4 => { + let bitw = value.get_type().get_bit_width(); + let v = if bitw < 32 { + self.builder + .build_int_s_extend(value, self.context.i32_type(), "expr_sext_i32") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + } else if bitw > 32 { + self.builder + .build_int_truncate(value, self.context.i32_type(), "expr_trunc_i32") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + } else { + value + }; + let i32_ptr_ty = self.context.ptr_type(AddressSpace::default()); + let cast_ptr = self + .builder + .build_pointer_cast(var_data_ptr, i32_ptr_ty, "expr_i32_ptr") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.builder + .build_store(cast_ptr, v) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + } + 8 => { + let v64 = if value.get_type().get_bit_width() < 64 { + self.builder + .build_int_s_extend(value, self.context.i64_type(), "expr_sext_i64") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + } else { + value + }; + let i64_ptr_ty = self.context.ptr_type(AddressSpace::default()); + let cast_ptr = self + .builder + .build_pointer_cast(var_data_ptr, i64_ptr_ty, "expr_i64_ptr") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.builder + .build_store(cast_ptr, v64) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + } + n => { + // Fallback: write lowest n bytes little-endian + let v64 = if value.get_type().get_bit_width() < 64 { + self.builder + .build_int_s_extend(value, self.context.i64_type(), "expr_sext_fallback") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + } else { + value + }; + for i in 0..n { + let shift = self.context.i64_type().const_int((i * 8) as u64, false); + let shifted = self + .builder + .build_right_shift(v64, shift, false, &format!("expr_shr_{i}")) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let byte = self + .builder + .build_int_truncate( + shifted, + self.context.i8_type(), + &format!("expr_byte_{i}"), + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let byte_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + var_data_ptr, + &[self.context.i32_type().const_int(i as u64, false)], + &format!("expr_byte_ptr_{i}"), + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))? + }; + self.builder + .build_store(byte_ptr, byte) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + } + } + } + + // Already accumulated; EndInstruction will send the whole event + Ok(()) + } + /// Generate PrintComplexVariable instruction and copy data at runtime using probe_read_user + pub(super) fn generate_print_complex_variable_runtime( + &mut self, + meta: PrintVarRuntimeMeta, + address: &ghostscope_dwarf::PlannedAddress, + dwarf_type: &ghostscope_dwarf::TypeInfo, + module_hint: Option<&str>, + ) -> Result<()> { + tracing::trace!( + var_name_index = meta.var_name_index, + type_index = meta.type_index, + access_path = %meta.access_path, + type_size = dwarf_type.size(), + data_len_limit = meta.data_len_limit, + address = ?address, + "generate_print_complex_variable_runtime: begin" + ); + // Compute sizes first, then reserve instruction region directly in accumulation buffer + + // Compute sizes + let access_path_bytes = meta.access_path.as_bytes(); + let access_path_len = std::cmp::min(access_path_bytes.len(), 255); // u8 max + let type_size = dwarf_type.size() as usize; + let mut data_len = std::cmp::min(type_size, meta.data_len_limit); + if data_len > u16::MAX as usize { + data_len = u16::MAX as usize; + } + + let header_size = std::mem::size_of::(); + let data_struct_size = std::mem::size_of::(); + // Reserve enough space to hold either the value (read_len) or an error payload (12 bytes) + let reserved_payload = std::cmp::max(data_len, 12); + let total_data_length = data_struct_size + access_path_len + reserved_payload; + let total_size = header_size + total_data_length; + tracing::trace!( + header_size, + data_struct_size, + access_path_len, + data_len, + total_data_length, + total_size, + "generate_print_complex_variable_runtime: sizes computed" + ); + + // Reserve space now that sizes are known + let inst_buffer = self + .reserve_instruction_region_or_return_zero(total_size as u64)? + .into_value_after_runtime_returns(); + + // Avoid memset; reserved map value bytes are zero-initialized + + // Write InstructionHeader.inst_type at offset 0 + let inst_type_val = self + .context + .i8_type() + .const_int(InstructionType::PrintComplexVariable as u64, false); + self.builder + .build_store(inst_buffer, inst_type_val) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store inst_type: {e}")))?; + tracing::trace!( + "generate_print_complex_variable_runtime: wrote inst_type=PrintComplexVariable" + ); + + // Write InstructionHeader + // data_length field (u16) at offset 1 + let data_length_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + inst_buffer, + &[self.context.i32_type().const_int(1, false)], + "data_length_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get data_length GEP: {e}")) + })? + }; + let data_length_ptr_cast = self + .builder + .build_pointer_cast( + data_length_ptr, + self.context.ptr_type(AddressSpace::default()), + "data_length_ptr_cast", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast data_length ptr: {e}")))?; + self.builder + .build_store( + data_length_ptr_cast, + self.context + .i16_type() + .const_int(total_data_length as u64, false), + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store data_length: {e}")))?; + tracing::trace!( + data_length = total_data_length, + "generate_print_complex_variable_runtime: wrote data_length" + ); + + // Data pointer (after header) + let data_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + inst_buffer, + &[self.context.i32_type().const_int(header_size as u64, false)], + "data_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to get data GEP: {e}")))? + }; + + // var_name_index (u16) + let var_name_index_val = self + .context + .i16_type() + .const_int(meta.var_name_index as u64, false); + // Store var_name_index at offset offsetof(PrintComplexVariableData, var_name_index) + let var_name_index_off = + std::mem::offset_of!(PrintComplexVariableData, var_name_index) as u64; + let var_name_index_ptr_i8 = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + data_ptr, + &[self.context.i32_type().const_int(var_name_index_off, false)], + "var_name_index_ptr_i8", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get var_name_index GEP: {e}")) + })? + }; + let var_name_index_ptr_i16 = self + .builder + .build_pointer_cast( + var_name_index_ptr_i8, + self.context.ptr_type(AddressSpace::default()), + "var_name_index_ptr_i16", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to cast var_name_index ptr: {e}")) + })?; + self.builder + .build_store(var_name_index_ptr_i16, var_name_index_val) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store var_name_index: {e}")))?; + tracing::trace!( + var_name_index = meta.var_name_index, + "generate_print_complex_variable_runtime: wrote var_name_index" + ); + + // type_index (u16) right after var_name_index + // type_index at offset offsetof(PrintComplexVariableData, type_index) = 2 + let type_index_offset = std::mem::offset_of!(PrintComplexVariableData, type_index) as u64; + let type_index_ptr_i8 = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + data_ptr, + &[self.context.i32_type().const_int(type_index_offset, false)], + "type_index_ptr_i8", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get type_index GEP: {e}")) + })? + }; + let type_index_ptr = self + .builder + .build_pointer_cast( + type_index_ptr_i8, + self.context.ptr_type(AddressSpace::default()), + "type_index_ptr_i16", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast type_index ptr: {e}")))?; + let type_index_val = self + .context + .i16_type() + .const_int(meta.type_index as u64, false); + self.builder + .build_store(type_index_ptr, type_index_val) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store type_index: {e}")))?; + tracing::trace!( + type_index = meta.type_index, + "generate_print_complex_variable_runtime: wrote type_index" + ); + + // access_path_len (u8) + // access_path_len at offset offsetof(..., access_path_len) + let access_path_len_off = + std::mem::offset_of!(PrintComplexVariableData, access_path_len) as u64; + let access_path_len_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + data_ptr, + &[self + .context + .i32_type() + .const_int(access_path_len_off, false)], + "access_path_len_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get access_path_len GEP: {e}")) + })? + }; + self.builder + .build_store( + access_path_len_ptr, + self.context + .i8_type() + .const_int(access_path_len as u64, false), + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to store access_path_len: {e}")) + })?; + tracing::trace!( + access_path_len, + "generate_print_complex_variable_runtime: wrote access_path_len" + ); + + // status (u8) at offset offsetof(..., status) + let status_off = std::mem::offset_of!(PrintComplexVariableData, status) as u64; + let status_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + data_ptr, + &[self.context.i32_type().const_int(status_off, false)], + "status_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to get status GEP: {e}")))? + }; + self.builder + .build_store( + status_ptr, + self.context + .i8_type() + .const_int(VariableStatus::Ok as u64, false), + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store status: {e}")))?; + + // (Optimized-out handling moved below after data_len pointer is available) + + // data_len (u16) + let data_len_off = std::mem::offset_of!(PrintComplexVariableData, data_len) as u64; + let data_len_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + data_ptr, + &[self.context.i32_type().const_int(data_len_off, false)], + "data_len_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to get data_len GEP: {e}")))? + }; + let data_len_ptr_cast = self + .builder + .build_pointer_cast( + data_len_ptr, + self.context.ptr_type(AddressSpace::default()), + "data_len_ptr_i16", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast data_len ptr: {e}")))?; + self.builder + .build_store( + data_len_ptr_cast, + self.context.i16_type().const_int(data_len as u64, false), + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store data_len: {e}")))?; + tracing::trace!( + data_len, + "generate_print_complex_variable_runtime: wrote data_len" + ); + + // Optimized-out case is handled earlier by resolving to an OptimizedOut type and ImmediateBytes path. + + // access_path bytes start after PrintComplexVariableData + let access_path_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + data_ptr, + &[self.context.i32_type().const_int( + std::mem::size_of::() as u64, + false, + )], + "access_path_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get access_path GEP: {e}")) + })? + }; + + // Copy access path bytes + for (i, &byte) in access_path_bytes.iter().enumerate().take(access_path_len) { + let byte_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + access_path_ptr, + &[self.context.i32_type().const_int(i as u64, false)], + &format!("access_path_byte_{i}"), + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get access_path byte GEP: {e}")) + })? + }; + let byte_val = self.context.i8_type().const_int(byte as u64, false); + self.builder.build_store(byte_ptr, byte_val).map_err(|e| { + CodeGenError::LLVMError(format!("Failed to store access_path byte: {e}")) + })?; + } + if access_path_len > 0 { + tracing::trace!("generate_print_complex_variable_runtime: wrote access_path bytes"); + } + + // Variable data starts after access_path + let variable_data_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + access_path_ptr, + &[self + .context + .i32_type() + .const_int(access_path_len as u64, false)], + "variable_data_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get variable_data GEP: {e}")) + })? + }; + + // Compute source address with ASLR-aware helper, honoring module hint + // Prefer a previously recorded module path for offsets; fall back handled in helper + let src_addr = self.resolve_planned_address(address, Some(status_ptr), module_hint)?; + tracing::trace!(src_addr = %src_addr.value, "generate_print_complex_variable_runtime: computed src_addr"); + + // Setup common types and casts + let ptr_type = self.context.ptr_type(AddressSpace::default()); + let i32_type = self.context.i32_type(); + let i64_type = self.context.i64_type(); + let dst_ptr = self + .builder + .build_bit_cast(variable_data_ptr, ptr_type, "dst_ptr") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let size_val = i32_type.const_int(data_len as u64, false); + let src_ptr = self + .builder + .build_int_to_ptr(src_addr.value, ptr_type, "src_ptr") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let offsets_found = src_addr.offsets_found; + let current_fn = self.current_function("generate print complex variable runtime")?; + let cont_block = self.context.append_basic_block(current_fn, "after_read"); + let skip_block = self.context.append_basic_block(current_fn, "offsets_skip"); + let found_block = self.context.append_basic_block(current_fn, "offsets_found"); + self.builder + .build_conditional_branch(offsets_found, found_block, skip_block) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.builder.position_at_end(skip_block); + self.mark_any_fail()?; + self.builder + .build_store(data_len_ptr_cast, self.context.i16_type().const_zero()) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.builder + .build_unconditional_branch(cont_block) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.builder.position_at_end(found_block); + + // Branch: NULL deref if src_addr == 0 + let zero64 = i64_type.const_zero(); + let is_null = self + .builder + .build_int_compare(inkwell::IntPredicate::EQ, src_addr.value, zero64, "is_null") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let null_block = self.context.append_basic_block(current_fn, "null_deref"); + let read_block = self.context.append_basic_block(current_fn, "read_user"); + self.builder + .build_conditional_branch(is_null, null_block, read_block) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + + // NULL path + self.builder.position_at_end(null_block); + self.builder + .build_store( + status_ptr, + self.context + .i8_type() + .const_int(VariableStatus::NullDeref as u64, false), + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + // data_len = 0 + self.builder + .build_store(data_len_ptr_cast, self.context.i16_type().const_zero()) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + // mark fail + self.mark_any_fail()?; + self.builder + .build_unconditional_branch(cont_block) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + + // Read path + self.builder.position_at_end(read_block); + let ret = self + .create_bpf_helper_call( + BPF_FUNC_probe_read_user as u64, + &[dst_ptr, size_val.into(), src_ptr.into()], + i32_type.into(), + "probe_read_user", + )? + .into_int_value(); + let is_err = self + .builder + .build_int_compare( + inkwell::IntPredicate::SLT, + ret, + i32_type.const_zero(), + "ret_lt_zero", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let err_block = self.context.append_basic_block(current_fn, "read_err"); + let ok_block = self.context.append_basic_block(current_fn, "read_ok"); + self.builder + .build_conditional_branch(is_err, err_block, ok_block) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + + // Error: status=2 (read_user failed); attach errno+addr payload and set data_len=12 + self.builder.position_at_end(err_block); + // Only set ReadError if status is still Ok (preserve OffsetsUnavailable etc.) + let cur_status1 = self + .builder + .build_load(self.context.i8_type(), status_ptr, "cur_status1") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let is_ok1 = self + .builder + .build_int_compare( + inkwell::IntPredicate::EQ, + cur_status1.into_int_value(), + self.context.i8_type().const_zero(), + "status_is_ok1", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let readerr_val = self + .context + .i8_type() + .const_int(VariableStatus::ReadError as u64, false) + .into(); + let new_status1 = self + .builder + .build_select(is_ok1, readerr_val, cur_status1, "status_after_readerr") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.builder + .build_store(status_ptr, new_status1) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + // data_len = 12 (errno:i32 + addr:u64) + self.builder + .build_store( + data_len_ptr_cast, + self.context.i16_type().const_int(12, false), + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + // write errno at [0..4] + let errno_ptr = self + .builder + .build_pointer_cast( + variable_data_ptr, + self.context.ptr_type(AddressSpace::default()), + "errno_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast errno ptr: {e}")))?; + let errno = self.build_errno_i32(ret, "readerr_errno_i32")?; + self.builder + .build_store(errno_ptr, errno) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store errno: {e}")))?; + // write addr at [4..12] + let addr_ptr_i8 = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + variable_data_ptr, + &[self.context.i32_type().const_int(4, false)], + "addr_ptr_i8", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to get addr GEP: {e}")))? + }; + let addr_ptr = self + .builder + .build_pointer_cast( + addr_ptr_i8, + self.context.ptr_type(AddressSpace::default()), + "addr_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast addr ptr: {e}")))?; + self.builder + .build_store(addr_ptr, src_addr.value) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store addr: {e}")))?; + // mark fail + self.mark_any_fail()?; + self.builder + .build_unconditional_branch(cont_block) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + + // OK path: status=0; optional truncated if data_len_limit < dwarf_type.size() + self.builder.position_at_end(ok_block); + if data_len < dwarf_type.size() as usize { + // truncated + self.builder + .build_store( + status_ptr, + self.context + .i8_type() + .const_int(VariableStatus::Truncated as u64, false), + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + // mark both success and fail + self.mark_any_success()?; + self.mark_any_fail()?; + } else { + // success + self.mark_any_success()?; + } + self.builder + .build_unconditional_branch(cont_block) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + + // Continue + self.builder.position_at_end(cont_block); + + // Already accumulated; EndInstruction will send the whole event + Ok(()) + } +} diff --git a/ghostscope-compiler/src/ebpf/codegen/print_string_index.rs b/ghostscope-compiler/src/ebpf/codegen/print_string_index.rs new file mode 100644 index 0000000..1c737c4 --- /dev/null +++ b/ghostscope-compiler/src/ebpf/codegen/print_string_index.rs @@ -0,0 +1,120 @@ +use super::*; + +impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { + /// Generate eBPF code for PrintStringIndex instruction + pub fn generate_print_string_index(&mut self, string_index: u16) -> Result<()> { + info!( + "Generating PrintStringIndex instruction: index={}", + string_index + ); + + // Allocate instruction structure on eBPF stack + // Reserve space in accumulation buffer for this instruction + let inst_buffer = self + .reserve_instruction_region_or_return_zero( + (std::mem::size_of::() + + std::mem::size_of::()) as u64, + )? + .into_value_after_runtime_returns(); + + // Clear memory with static size + let _inst_size = self.context.i64_type().const_int( + (std::mem::size_of::() + + std::mem::size_of::()) + as u64, + false, + ); + // Avoid memset on eBPF; global buffer is zero-initialized and we write fields explicitly. + + // Fill instruction header using byte offsets + // inst_type at offset 0 (first field of InstructionHeader) + let inst_type_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + inst_buffer, + &[self.context.i32_type().const_int( + std::mem::offset_of!(InstructionHeader, inst_type) as u64, + false, + )], + "inst_type_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to get inst_type GEP: {e}")))? + }; + let inst_type_val = self + .context + .i8_type() + .const_int(InstructionType::PrintStringIndex as u64, false); + self.builder + .build_store(inst_type_ptr, inst_type_val) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store inst_type: {e}")))?; + + let data_length_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + inst_buffer, + &[self.context.i32_type().const_int( + std::mem::offset_of!(InstructionHeader, data_length) as u64, + false, + )], + "data_length_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get data_length GEP: {e}")) + })? + }; + let data_length_i16_ptr = self + .builder + .build_pointer_cast( + data_length_ptr, + self.context.ptr_type(AddressSpace::default()), + "data_length_i16_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast data_length ptr: {e}")))?; + let data_length_val = self + .context + .i16_type() + .const_int(std::mem::size_of::() as u64, false); + self.builder + .build_store(data_length_i16_ptr, data_length_val) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store data_length: {e}")))?; + + // Fill string index data (after InstructionHeader) + let string_index_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + inst_buffer, + &[self + .context + .i32_type() + .const_int(std::mem::size_of::() as u64, false)], + "string_index_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get string_index GEP: {e}")) + })? + }; + let string_index_i16_ptr = self + .builder + .build_pointer_cast( + string_index_ptr, + self.context.ptr_type(AddressSpace::default()), + "string_index_i16_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to cast string_index ptr: {e}")) + })?; + let string_index_val = self + .context + .i16_type() + .const_int(string_index as u64, false); + self.builder + .build_store(string_index_i16_ptr, string_index_val) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store string_index: {e}")))?; + + // Already accumulated; EndInstruction will send the whole event + Ok(()) + } +} diff --git a/ghostscope-compiler/src/ebpf/codegen/print_variable_index.rs b/ghostscope-compiler/src/ebpf/codegen/print_variable_index.rs new file mode 100644 index 0000000..cce1bdb --- /dev/null +++ b/ghostscope-compiler/src/ebpf/codegen/print_variable_index.rs @@ -0,0 +1,499 @@ +use super::*; + +impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { + /// Generate eBPF code for PrintVariableIndex instruction + pub fn generate_print_variable_index( + &mut self, + var_name_index: u16, + type_encoding: TypeKind, + var_name: &str, + ) -> Result<()> { + info!( + "Generating PrintVariableIndex instruction: var_name_index={}, type={:?}, var_name={}", + var_name_index, type_encoding, var_name + ); + + // Resolve type_index from DWARF if available; otherwise synthesize from TypeKind + let type_index = match self.query_dwarf_for_variable(var_name)? { + Some(var) => match var.dwarf_type { + Some(ref t) => self.trace_context.add_type(t.clone()), + None => self.add_synthesized_type_index_for_kind(type_encoding), + }, + None => { + // Variable not found via DWARF; fall back to synthesized type info based on TypeKind + self.add_synthesized_type_index_for_kind(type_encoding) + } + }; + + self.generate_successful_variable_instruction( + var_name_index, + type_encoding, + type_index, + var_name, + ) + } + + /// Generate successful variable instruction with data + pub(super) fn generate_successful_variable_instruction( + &mut self, + var_name_index: u16, + type_encoding: TypeKind, + type_index: u16, + var_name: &str, + ) -> Result<()> { + // Determine data size based on type + let data_size = match type_encoding { + TypeKind::U8 | TypeKind::I8 | TypeKind::Bool | TypeKind::Char => 1, + TypeKind::U16 | TypeKind::I16 => 2, + TypeKind::U32 | TypeKind::I32 | TypeKind::F32 => 4, + TypeKind::U64 | TypeKind::I64 | TypeKind::F64 | TypeKind::Pointer => 8, + _ => 8, // Default to 8 bytes for complex types + }; + + // Reserve space directly in per-CPU accumulation buffer + let inst_buffer = self + .reserve_instruction_region_or_return_zero( + (std::mem::size_of::() + + std::mem::size_of::() + + data_size as usize) as u64, + )? + .into_value_after_runtime_returns(); + + // Avoid memset; global buffer is zero-initialized + + // Store instruction type at offset 0 + let inst_type_val = self + .context + .i8_type() + .const_int(InstructionType::PrintVariableIndex as u64, false); + self.builder + .build_store(inst_buffer, inst_type_val) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store inst_type: {e}")))?; + + // Store data_length field of InstructionHeader + let data_length_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + inst_buffer, + &[self.context.i32_type().const_int( + std::mem::offset_of!(InstructionHeader, data_length) as u64, + false, + )], + "data_length_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get data_length GEP: {e}")) + })? + }; + let data_length_i16_ptr = self + .builder + .build_pointer_cast( + data_length_ptr, + self.context.ptr_type(AddressSpace::default()), + "data_length_i16_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast data_length ptr: {e}")))?; + let total_data_length = std::mem::size_of::() + data_size as usize; + let data_length_val = self + .context + .i16_type() + .const_int(total_data_length as u64, false); + self.builder + .build_store(data_length_i16_ptr, data_length_val) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store data_length: {e}")))?; + + // Write PrintVariableIndexData after InstructionHeader + let variable_data_start = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + inst_buffer, + &[self + .context + .i32_type() + .const_int(std::mem::size_of::() as u64, false)], + "variable_data_start", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get variable_data_start GEP: {e}")) + })? + }; + + // Store var_name_index using correct offset + let var_name_index_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + variable_data_start, + &[self.context.i32_type().const_int( + std::mem::offset_of!(PrintVariableIndexData, var_name_index) as u64, + false, + )], + "var_name_index_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get var_name_index GEP: {e}")) + })? + }; + let var_name_index_i16_ptr = self + .builder + .build_pointer_cast( + var_name_index_ptr, + self.context.ptr_type(AddressSpace::default()), + "var_name_index_i16_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to cast var_name_index ptr: {e}")) + })?; + let var_name_index_val = self + .context + .i16_type() + .const_int(var_name_index as u64, false); + self.builder + .build_store(var_name_index_i16_ptr, var_name_index_val) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store var_name_index: {e}")))?; + + // Store type_encoding using correct offset + let type_encoding_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + variable_data_start, + &[self.context.i32_type().const_int( + std::mem::offset_of!(PrintVariableIndexData, type_encoding) as u64, + false, + )], + "type_encoding_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get type_encoding GEP: {e}")) + })? + }; + let type_encoding_val = self + .context + .i8_type() + .const_int(type_encoding as u8 as u64, false); + self.builder + .build_store(type_encoding_ptr, type_encoding_val) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store type_encoding: {e}")))?; + + // Store data_len using correct offset + let data_len_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + variable_data_start, + &[self.context.i32_type().const_int( + std::mem::offset_of!(PrintVariableIndexData, data_len) as u64, + false, + )], + "data_len_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to get data_len GEP: {e}")))? + }; + let data_len_i16_ptr = self + .builder + .build_pointer_cast( + data_len_ptr, + self.context.ptr_type(AddressSpace::default()), + "data_len_i16_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast data_len ptr: {e}")))?; + let data_len_val = self.context.i16_type().const_int(data_size as u64, false); // Store as u16 + self.builder + .build_store(data_len_i16_ptr, data_len_val) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store data_len: {e}")))?; + + // Store type_index using correct offset + let type_index_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + variable_data_start, + &[self.context.i32_type().const_int( + std::mem::offset_of!(PrintVariableIndexData, type_index) as u64, + false, + )], + "type_index_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to get type_index GEP: {e}")) + })? + }; + let type_index_i16_ptr = self + .builder + .build_pointer_cast( + type_index_ptr, + self.context.ptr_type(AddressSpace::default()), + "type_index_i16_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to cast type_index ptr: {e}")))?; + let type_index_val = self.context.i16_type().const_int(type_index as u64, false); + self.builder + .build_store(type_index_i16_ptr, type_index_val) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store type_index: {e}")))?; + + // Store status (set to 0) + let status_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + variable_data_start, + &[self.context.i32_type().const_int( + std::mem::offset_of!(PrintVariableIndexData, status) as u64, + false, + )], + "status_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to get status GEP: {e}")))? + }; + let status_val = self + .context + .i8_type() + .const_int(VariableStatus::Ok as u64, false); + self.builder + .build_store(status_ptr, status_val) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to store status: {e}")))?; + + let var_data = self.resolve_variable_value(var_name, type_encoding, Some(status_ptr))?; + + // Store actual variable data after PrintVariableIndexData structure + let var_data_ptr = unsafe { + self.builder + .build_gep( + self.context.i8_type(), + variable_data_start, + &[self + .context + .i32_type() + .const_int(std::mem::size_of::() as u64, false)], + "var_data_ptr", + ) + .map_err(|e| CodeGenError::LLVMError(format!("Failed to get var_data GEP: {e}")))? + }; + + // Store the runtime variable value based on data size + // The var_data contains the LLVM IR value (from register/memory access) + match data_size { + 1 => { + // Store as i8 + let truncated = match var_data { + BasicValueEnum::IntValue(int_val) => self + .builder + .build_int_truncate(int_val, self.context.i8_type(), "truncated_i8") + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to truncate to i8: {e}")) + })?, + _ => { + return Err(CodeGenError::LLVMError( + "Expected integer value for integer type".to_string(), + )); + } + }; + self.builder + .build_store(var_data_ptr, truncated) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to store i8 data: {e}")) + })?; + } + 2 => { + // Store as i16 + let truncated = match var_data { + BasicValueEnum::IntValue(int_val) => self + .builder + .build_int_truncate(int_val, self.context.i16_type(), "truncated_i16") + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to truncate to i16: {e}")) + })?, + _ => { + return Err(CodeGenError::LLVMError( + "Expected integer value for integer type".to_string(), + )); + } + }; + let i16_ptr = self + .builder + .build_pointer_cast( + var_data_ptr, + self.context.ptr_type(AddressSpace::default()), + "i16_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to cast to i16 ptr: {e}")) + })?; + self.builder.build_store(i16_ptr, truncated).map_err(|e| { + CodeGenError::LLVMError(format!("Failed to store i16 data: {e}")) + })?; + } + 4 => { + // Store as i32 or f32 + match var_data { + BasicValueEnum::IntValue(int_val) => { + let truncated = self + .builder + .build_int_truncate(int_val, self.context.i32_type(), "truncated_i32") + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to truncate to i32: {e}")) + })?; + let i32_ptr = self + .builder + .build_pointer_cast( + var_data_ptr, + self.context.ptr_type(AddressSpace::default()), + "i32_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to cast to i32 ptr: {e}")) + })?; + self.builder.build_store(i32_ptr, truncated).map_err(|e| { + CodeGenError::LLVMError(format!("Failed to store i32 data: {e}")) + })?; + } + BasicValueEnum::FloatValue(float_val) => { + let f32_ptr = self + .builder + .build_pointer_cast( + var_data_ptr, + self.context.ptr_type(AddressSpace::default()), + "f32_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to cast to f32 ptr: {e}")) + })?; + self.builder.build_store(f32_ptr, float_val).map_err(|e| { + CodeGenError::LLVMError(format!("Failed to store f32 data: {e}")) + })?; + } + _ => { + return Err(CodeGenError::LLVMError( + "Expected integer or float value for 4-byte type".to_string(), + )); + } + } + } + 8 => { + // Store as i64, f64, or pointer + match var_data { + BasicValueEnum::IntValue(int_val) => { + let i64_ptr = self + .builder + .build_pointer_cast( + var_data_ptr, + self.context.ptr_type(AddressSpace::default()), + "i64_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to cast to i64 ptr: {e}")) + })?; + self.builder.build_store(i64_ptr, int_val).map_err(|e| { + CodeGenError::LLVMError(format!("Failed to store i64 data: {e}")) + })?; + } + BasicValueEnum::FloatValue(float_val) => { + let f64_ptr = self + .builder + .build_pointer_cast( + var_data_ptr, + self.context.ptr_type(AddressSpace::default()), + "f64_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to cast to f64 ptr: {e}")) + })?; + self.builder.build_store(f64_ptr, float_val).map_err(|e| { + CodeGenError::LLVMError(format!("Failed to store f64 data: {e}")) + })?; + } + BasicValueEnum::PointerValue(ptr_val) => { + // Store pointer as u64 + let ptr_int = self + .builder + .build_ptr_to_int(ptr_val, self.context.i64_type(), "ptr_as_int") + .map_err(|e| { + CodeGenError::LLVMError(format!( + "Failed to convert ptr to int: {e}" + )) + })?; + let i64_ptr = self + .builder + .build_pointer_cast( + var_data_ptr, + self.context.ptr_type(AddressSpace::default()), + "i64_ptr", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to cast to i64 ptr: {e}")) + })?; + self.builder.build_store(i64_ptr, ptr_int).map_err(|e| { + CodeGenError::LLVMError(format!("Failed to store pointer data: {e}")) + })?; + } + _ => { + return Err(CodeGenError::LLVMError( + "Expected integer, float, or pointer value for 8-byte type".to_string(), + )); + } + } + } + _ => { + return Err(CodeGenError::LLVMError(format!( + "Unsupported data size: {data_size}" + ))); + } + } + + // Already accumulated; EndInstruction will send the whole event + Ok(()) + } + /// Resolve variable value from script variables first, then DWARF + pub(super) fn resolve_variable_value( + &mut self, + var_name: &str, + type_encoding: TypeKind, + status_ptr: Option>, + ) -> Result> { + info!( + "Resolving variable value: {} ({:?})", + var_name, type_encoding + ); + + // 1) Script variable first + if self.variable_exists(var_name) { + info!("Found script variable for '{}', loading value", var_name); + return self.load_variable(var_name); + } + + // 2) DWARF variable as fallback + match self.query_dwarf_for_variable(var_name)? { + Some(var_info) => { + info!( + "Found DWARF variable read plan: {} availability={:?}", + var_name, var_info.availability + ); + + // Require DWARF type information + var_info.dwarf_type.as_ref().ok_or_else(|| { + CodeGenError::DwarfError(format!( + "Variable '{var_name}' has no type information in DWARF" + )) + })?; + + let compile_context = self.get_compile_time_context()?; + self.variable_read_plan_to_llvm_value( + &var_info, + compile_context.pc_address, + status_ptr, + ) + } + None => { + let compile_context = self.get_compile_time_context()?; + warn!( + "Variable '{}' not found in DWARF at address 0x{:x}", + var_name, compile_context.pc_address + ); + Err(CodeGenError::VariableNotFound(var_name.to_string())) + } + } + } +} diff --git a/ghostscope-compiler/src/ebpf/codegen/statements.rs b/ghostscope-compiler/src/ebpf/codegen/statements.rs new file mode 100644 index 0000000..04b914d --- /dev/null +++ b/ghostscope-compiler/src/ebpf/codegen/statements.rs @@ -0,0 +1,338 @@ +use super::*; + +impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { + /// Main entry point: compile program with staged transmission system + pub fn compile_program_with_staged_transmission( + &mut self, + program: &Program, + _variable_types: HashMap, + ) -> Result { + info!("Compiling program with staged transmission system"); + + // Step 1: Send TraceEventHeader + self.send_trace_event_header()?; + info!("Sent TraceEventHeader"); + + // Step 2: Send TraceEventMessage with dynamic trace_id + let trace_id = self.current_trace_id.map(|id| id as u64).unwrap_or(0); + self.send_trace_event_message(trace_id)?; + info!("Sent TraceEventMessage"); + + // Reset per-event execution status flags + self.store_flag_value("_gs_any_fail", 0)?; + self.store_flag_value("_gs_any_success", 0)?; + + // Step 3: Process each statement and generate LLVM IR on-demand + let mut instruction_count = 0u16; + for statement in &program.statements { + instruction_count += self.compile_statement(statement)?; + } + + // Step 4: Send EndInstruction to mark completion + self.send_end_instruction(instruction_count)?; + info!( + "Sent EndInstruction with {} total instructions", + instruction_count + ); + + // Step 5: Return the trace context for user-space parsing + Ok(self.trace_context.clone()) + } + + /// Compile a statement and return the number of instructions generated + pub fn compile_statement(&mut self, statement: &Statement) -> Result { + debug!("Compiling statement: {:?}", statement); + + match statement { + Statement::AliasDeclaration { name, target } => { + info!("Registering alias variable: {} = {:?}", name, target); + // Declare in current scope (no redeclaration or shadowing) + self.declare_name_in_current_scope(name)?; + self.set_alias_variable(name, target.clone()); + Ok(0) + } + Statement::VarDeclaration { name, value } => { + info!("Processing variable declaration: {} = {:?}", name, value); + // Declare in current scope (no redeclaration or shadowing) + self.declare_name_in_current_scope(name)?; + // Decide whether this is an alias binding (DWARF-backed address/reference) + if self.is_alias_candidate_expr(value) { + self.set_alias_variable(name, value.clone()); + tracing::debug!(var=%name, "Registered DWARF alias variable"); + Ok(0) + } else { + // Compile the value expression and store as concrete variable + // Special-case: string literal and string var copy — record bytes for content printing + match value { + crate::script::Expr::String(s) => { + let mut bytes = s.as_bytes().to_vec(); + bytes.push(0); // NUL terminate for display convenience + self.set_string_variable_bytes(name, bytes); + } + crate::script::Expr::Variable(ref nm) => { + if self + .get_variable_type(nm) + .is_some_and(|t| matches!(t, crate::script::VarType::String)) + { + if let Some(b) = self.get_string_variable_bytes(nm).cloned() { + self.set_string_variable_bytes(name, b); + } + } + } + _ => {} + } + let compiled_value = self.compile_expr(value)?; + // Disallow storing pointer values in script variables, except for string literals + if let BasicValueEnum::PointerValue(_) = compiled_value { + // Allow if RHS is a string literal OR a string variable (VarType::String) + let allow_string_var_copy = match value { + crate::script::Expr::String(_) => true, + crate::script::Expr::Variable(ref nm) => self + .get_variable_type(nm) + .is_some_and(|t| matches!(t, crate::script::VarType::String)), + _ => false, + }; + if !allow_string_var_copy { + return Err(CodeGenError::TypeError( + "script variables cannot store pointer values; use DWARF alias (let v = &expr) or keep it as a string".to_string(), + )); + } + } + self.store_variable(name, compiled_value)?; + Ok(0) // VarDeclaration doesn't generate instructions + } + } + Statement::Print(print_stmt) => self.compile_print_statement(print_stmt), + Statement::If { + condition, + then_body, + else_body, + } => { + let entry_event_bytes = self.compile_time_event_bytes_upper_bound; + // Prepare condition context (runtime error capture) + // Pretty expression text for warning + let expr_text = self.expr_to_name(condition); + let expr_index = self.trace_context.add_string(expr_text); + // Activate condition context (compile-time flag) and reset runtime error byte + self.condition_context_active = true; + self.reset_condition_error()?; + + // Compile condition expression + let cond_value = self.compile_expr(condition)?; + + // Convert condition to i1 (boolean) for branching + let cond_bool = match cond_value { + BasicValueEnum::IntValue(int_val) => { + // Convert integer to boolean (non-zero = true) + self.builder + .build_int_compare( + inkwell::IntPredicate::NE, + int_val, + int_val.get_type().const_zero(), + "cond_bool", + ) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to create condition: {e}")) + })? + } + _ => { + return Err(CodeGenError::LLVMError( + "Condition must evaluate to integer".to_string(), + )); + } + }; + + // Get current function from builder + let current_function = self + .builder + .get_insert_block() + .ok_or_else(|| CodeGenError::LLVMError("No current basic block".to_string()))? + .get_parent() + .ok_or_else(|| CodeGenError::LLVMError("No parent function".to_string()))?; + + // Create basic blocks for error/noerror and then/else paths + let then_block = self + .context + .append_basic_block(current_function, "then_block"); + let else_block = self + .context + .append_basic_block(current_function, "else_block"); + let merge_block = self + .context + .append_basic_block(current_function, "merge_block"); + let err_block = self + .context + .append_basic_block(current_function, "cond_err_block"); + let ok_block = self + .context + .append_basic_block(current_function, "cond_ok_block"); + // After cond compiled, deactivate compile-time flag + self.condition_context_active = false; + + // First branch: did runtime errors occur while evaluating the condition? + let cond_err_pred = self.build_condition_error_predicate()?; + self.builder + .build_conditional_branch(cond_err_pred, err_block, ok_block) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to branch on cond_err: {e}")) + })?; + + // Error path: emit ExprError and decide destination + self.builder.position_at_end(err_block); + self.compile_time_event_bytes_upper_bound = entry_event_bytes; + self.emit_current_condition_exprerror(expr_index, "cond")?; + // Decide where to go on error: if else_body is If (else-if), go to else_block to continue; + // otherwise, skip else (suppress) and jump to merge. + let goto_else = matches!(else_body.as_deref(), Some(Statement::If { .. })); + let err_path_event_bytes = self.compile_time_event_bytes_upper_bound; + if goto_else { + self.builder + .build_unconditional_branch(else_block) + .map_err(|e| { + CodeGenError::LLVMError(format!( + "Failed to branch to else on error: {e}" + )) + })?; + } else { + self.builder + .build_unconditional_branch(merge_block) + .map_err(|e| { + CodeGenError::LLVMError(format!( + "Failed to branch to merge on error: {e}" + )) + })?; + } + + // No-error path: branch on boolean condition + self.builder.position_at_end(ok_block); + self.compile_time_event_bytes_upper_bound = entry_event_bytes; + self.builder + .build_conditional_branch(cond_bool, then_block, else_block) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to create branch: {e}")) + })?; + + // Build then block + self.builder.position_at_end(then_block); + self.compile_time_event_bytes_upper_bound = entry_event_bytes; + let mut then_instructions = 0u16; + self.enter_scope(); + for stmt in then_body { + then_instructions += self.compile_statement(stmt)?; + } + self.exit_scope(); + let then_event_bytes = self.compile_time_event_bytes_upper_bound; + self.builder + .build_unconditional_branch(merge_block) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to branch to merge: {e}")) + })?; + + // Build else block + self.builder.position_at_end(else_block); + let else_entry_event_bytes = if goto_else { + entry_event_bytes.max(err_path_event_bytes) + } else { + entry_event_bytes + }; + self.compile_time_event_bytes_upper_bound = else_entry_event_bytes; + let mut else_instructions = 0u16; + if let Some(else_stmt) = else_body { + self.enter_scope(); + else_instructions += self.compile_statement(else_stmt)?; + self.exit_scope(); + } + self.builder + .build_unconditional_branch(merge_block) + .map_err(|e| { + CodeGenError::LLVMError(format!("Failed to branch to merge: {e}")) + })?; + let else_event_bytes = self.compile_time_event_bytes_upper_bound; + + // Continue with merge block + self.builder.position_at_end(merge_block); + self.compile_time_event_bytes_upper_bound = if goto_else { + then_event_bytes.max(else_event_bytes) + } else { + then_event_bytes + .max(else_event_bytes) + .max(err_path_event_bytes) + }; + + // Return the maximum instructions from either branch + Ok(std::cmp::max(then_instructions, else_instructions)) + } + Statement::Block(nested_statements) => { + let mut total_instructions = 0u16; + self.enter_scope(); + for stmt in nested_statements { + total_instructions += self.compile_statement(stmt)?; + } + self.exit_scope(); + Ok(total_instructions) + } + Statement::TracePoint { pattern: _, body } => { + let mut total_instructions = 0u16; + // Start a new scope for the trace body + self.enter_scope(); + for stmt in body { + total_instructions += self.compile_statement(stmt)?; + } + self.exit_scope(); + Ok(total_instructions) + } + _ => { + warn!("Unsupported statement type: {:?}", statement); + Ok(0) + } + } + } + + /// Compile print statement and generate LLVM IR on-demand + pub fn compile_print_statement(&mut self, print_stmt: &PrintStatement) -> Result { + info!("Compiling print statement: {:?}", print_stmt); + + match print_stmt { + PrintStatement::String(s) => { + info!("Processing string literal: {}", s); + // 1. Add string to TraceContext + let string_index = self.trace_context.add_string(s.to_string()); + // 2. Generate eBPF code for PrintStringIndex + self.generate_print_string_index(string_index)?; + Ok(1) // Generated 1 instruction + } + PrintStatement::Variable(var_name) => { + info!("Processing variable: {}", var_name); + let expr = crate::script::Expr::Variable(var_name.clone()); + let arg = self.resolve_expr_to_arg(&expr)?; + let n = self.emit_print_from_arg(arg)?; + tracing::trace!( + var_name = %var_name, + instructions = n, + "compile_print_statement: emitted via unified resolver" + ); + Ok(n) + } + PrintStatement::ComplexVariable(expr) => { + info!("Processing complex variable: {:?}", expr); + let arg = self.compile_print_expr_with_builtin_exprerror(expr, |ctx| { + ctx.resolve_expr_to_arg(expr) + })?; + let n = self.emit_print_from_arg(arg)?; + tracing::trace!( + instructions = n, + "compile_print_statement: emitted via unified resolver" + ); + Ok(n) + } + PrintStatement::Formatted { format, args } => { + info!( + "Processing formatted print: '{}' with {} args", + format, + args.len() + ); + self.compile_formatted_print(format, args) + } + } + } +} diff --git a/ghostscope-compiler/src/ebpf/codegen/tests.rs b/ghostscope-compiler/src/ebpf/codegen/tests.rs new file mode 100644 index 0000000..9c5d6c0 --- /dev/null +++ b/ghostscope-compiler/src/ebpf/codegen/tests.rs @@ -0,0 +1,620 @@ +use super::*; +use crate::CompileOptions; +use ghostscope_protocol::trace_event::{TraceEventHeader, TraceEventMessage}; + +#[test] +fn print_complex_format_budget_tracks_event_size() { + let bytes_reserved_so_far = + std::mem::size_of::() + std::mem::size_of::(); + let expected = 32768 + - (bytes_reserved_so_far + + std::mem::size_of::() + + std::mem::size_of::()); + assert_eq!( + print_complex_format_instruction_budget(32768, bytes_reserved_so_far), + expected + ); + assert!(print_complex_format_instruction_budget(32768, bytes_reserved_so_far) > 4096); +} + +#[test] +fn print_complex_format_budget_shrinks_after_prior_instructions() { + let bytes_reserved_so_far = + std::mem::size_of::() + std::mem::size_of::() + 2048; + let base_budget = print_complex_format_instruction_budget( + 32768, + std::mem::size_of::() + std::mem::size_of::(), + ); + assert_eq!( + print_complex_format_instruction_budget(32768, bytes_reserved_so_far), + base_budget - 2048 + ); +} + +#[test] +fn dynamic_payload_reservations_share_budget_fairly() { + let reservations = allocate_dynamic_payload_reservations(&[256, 256, 256, 256], 512); + assert_eq!(reservations, vec![128, 128, 128, 128]); +} + +#[test] +fn dynamic_payload_reservations_keep_error_headroom_when_possible() { + let reservations = allocate_dynamic_payload_reservations(&[256, 256, 256], 36); + assert_eq!(reservations, vec![12, 12, 12]); +} + +#[test] +fn build_errno_i32_truncates_i64_errors() { + let context = inkwell::context::Context::create(); + let opts = CompileOptions::default(); + let ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("create EbpfContext"); + let fn_type = context.i32_type().fn_type(&[], false); + let function = ctx.module.add_function("errno_test", fn_type, None); + let block = context.append_basic_block(function, "entry"); + ctx.builder.position_at_end(block); + + let errno = ctx + .build_errno_i32( + context.i64_type().const_int((-14i64) as u64, true), + "errno_i32", + ) + .expect("truncate errno"); + assert_eq!(errno.get_type().get_bit_width(), 32); +} + +#[test] +fn computed_int_store_i64_compiles() { + let context = inkwell::context::Context::create(); + let opts = CompileOptions::default(); + let mut ctx = + EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("create EbpfContext"); + // print {} with a pure script integer expression triggers ComputedInt path + let expr = crate::script::Expr::BinaryOp { + left: Box::new(crate::script::Expr::Int(41)), + op: crate::script::BinaryOp::Add, + right: Box::new(crate::script::Expr::Int(1)), + }; + let stmt = + crate::script::Statement::Print(crate::script::PrintStatement::ComplexVariable(expr)); + let program = crate::script::Program::new(); + let res = ctx.compile_program(&program, "test_func", &[stmt], None, None, None); + assert!(res.is_ok(), "Compilation failed: {:?}", res.err()); +} + +#[test] +fn computed_int_in_format_compiles() { + let context = inkwell::context::Context::create(); + let opts = CompileOptions::default(); + let mut ctx = + EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("create EbpfContext"); + // formatted print with expression argument should also route into ComputedInt path + let expr = crate::script::Expr::BinaryOp { + left: Box::new(crate::script::Expr::Int(1)), + op: crate::script::BinaryOp::Add, + right: Box::new(crate::script::Expr::Int(2)), + }; + let stmt = crate::script::Statement::Print(crate::script::PrintStatement::Formatted { + format: "sum:{}".to_string(), + args: vec![expr], + }); + let program = crate::script::Program::new(); + let res = ctx.compile_program(&program, "test_fmt", &[stmt], None, None, None); + assert!(res.is_ok(), "Compilation failed: {:?}", res.err()); +} + +#[test] +fn memcmp_rejects_script_pointer_variable_now() { + let context = inkwell::context::Context::create(); + let opts = CompileOptions::default(); + let mut ctx = + EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("create EbpfContext"); + + // let p = "A"; // script pointer to const string (no longer accepted as memcmp arg) + let decl = crate::script::Statement::VarDeclaration { + name: "p".to_string(), + value: crate::script::Expr::String("A".to_string()), + }; + + // if memcmp(p, hex("41"), 1) { print "OK"; } + let if_stmt = crate::script::Statement::If { + condition: crate::script::Expr::BuiltinCall { + name: "memcmp".to_string(), + args: vec![ + crate::script::Expr::Variable("p".to_string()), + crate::script::Expr::BuiltinCall { + name: "hex".to_string(), + args: vec![crate::script::Expr::String("41".to_string())], + }, + crate::script::Expr::Int(1), + ], + }, + then_body: vec![crate::script::Statement::Print( + crate::script::PrintStatement::String("OK".to_string()), + )], + else_body: None, + }; + + let program = crate::script::Program::new(); + let res = ctx.compile_program( + &program, + "test_memcmp_ptr", + &[decl, if_stmt], + None, + None, + None, + ); + assert!( + res.is_err(), + "Expected type error for script pointer variable in memcmp" + ); +} + +#[test] +fn strncmp_requires_string_on_one_side_error_message() { + let context = inkwell::context::Context::create(); + let opts = CompileOptions::default(); + let mut ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("ctx"); + + // strncmp(42, 43, 2) -> neither side is string (literal/var); expect type error + let stmt = crate::script::Statement::If { + condition: crate::script::Expr::BuiltinCall { + name: "strncmp".to_string(), + args: vec![ + crate::script::Expr::Int(42), + crate::script::Expr::Int(43), + crate::script::Expr::Int(2), + ], + }, + then_body: vec![crate::script::Statement::Print( + crate::script::PrintStatement::String("OK".to_string()), + )], + else_body: None, + }; + let program = crate::script::Program::new(); + let res = ctx.compile_program(&program, "test_strncmp_err", &[stmt], None, None, None); + assert!( + res.is_err(), + "expected error when neither side is string (got {res:?})", + ); + let msg = format!("{:?}", res.err()); + assert!(msg.contains("strncmp requires at least one string argument")); +} + +// No test needed here for string var copy rejection; current semantics allow +// let s = "A"; let p = s; as a string-to-string assignment. + +#[test] +fn immutable_variable_redeclaration_rejected() { + let context = inkwell::context::Context::create(); + let opts = CompileOptions::default(); + let mut ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("ctx"); + + // let x = 1; let x = 2; (same trace block) + let d1 = crate::script::Statement::VarDeclaration { + name: "x".to_string(), + value: crate::script::Expr::Int(1), + }; + let d2 = crate::script::Statement::VarDeclaration { + name: "x".to_string(), + value: crate::script::Expr::Int(2), + }; + let program = crate::script::Program::new(); + let res = ctx.compile_program(&program, "immut", &[d1, d2], None, None, None); + assert!(res.is_err(), "expected immutability error, got {res:?}"); + let msg = format!("{:?}", res.err()); + assert!( + msg.contains("Redeclaration in the same scope") || msg.contains("immutable variable"), + "unexpected error msg: {msg}" + ); +} + +#[test] +fn immutable_alias_rebinding_rejected() { + let context = inkwell::context::Context::create(); + let opts = CompileOptions::default(); + let mut ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("ctx"); + + // let p = &arr[0]; let p = &arr[0]; + let a1 = crate::script::Statement::AliasDeclaration { + name: "p".to_string(), + target: crate::script::Expr::AddressOf(Box::new(crate::script::Expr::Variable( + "arr".to_string(), + ))), + }; + let a2 = crate::script::Statement::AliasDeclaration { + name: "p".to_string(), + target: crate::script::Expr::AddressOf(Box::new(crate::script::Expr::Variable( + "arr".to_string(), + ))), + }; + let program = crate::script::Program::new(); + let res = ctx.compile_program(&program, "immut_alias", &[a1, a2], None, None, None); + assert!( + res.is_err(), + "expected immutability error for alias, got {res:?}" + ); +} + +#[test] +fn alias_to_alias_with_const_offset_is_alias_variable() { + let context = inkwell::context::Context::create(); + let opts = CompileOptions::default(); + let mut ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("ctx"); + // let base = &buf[0]; let tail = base + 16; + let s1 = crate::script::Statement::AliasDeclaration { + name: "base".to_string(), + target: crate::script::Expr::AddressOf(Box::new(crate::script::Expr::ArrayAccess( + Box::new(crate::script::Expr::Variable("buf".to_string())), + Box::new(crate::script::Expr::Int(0)), + ))), + }; + let s2 = crate::script::Statement::VarDeclaration { + name: "tail".to_string(), + value: crate::script::Expr::BinaryOp { + left: Box::new(crate::script::Expr::Variable("base".to_string())), + op: crate::script::BinaryOp::Add, + right: Box::new(crate::script::Expr::Int(16)), + }, + }; + let program = crate::script::Program::new(); + // Should treat tail as alias (not as value), thus compile_program succeeds + let res = ctx.compile_program(&program, "alias_stage", &[s1, s2], None, None, None); + assert!(res.is_ok(), "expected alias-to-alias staging to compile"); +} + +#[test] +fn alias_to_alias_with_negative_const_offset_is_alias_variable() { + let context = inkwell::context::Context::create(); + let opts = CompileOptions::default(); + let mut ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("ctx"); + // let base = &buf[1]; let head = base + -1; + let base = crate::script::Statement::AliasDeclaration { + name: "base".to_string(), + target: crate::script::Expr::AddressOf(Box::new(crate::script::Expr::ArrayAccess( + Box::new(crate::script::Expr::Variable("buf".to_string())), + Box::new(crate::script::Expr::Int(1)), + ))), + }; + let negative_one = crate::script::Expr::BinaryOp { + left: Box::new(crate::script::Expr::Int(0)), + op: crate::script::BinaryOp::Subtract, + right: Box::new(crate::script::Expr::Int(1)), + }; + let head = crate::script::Statement::VarDeclaration { + name: "head".to_string(), + value: crate::script::Expr::BinaryOp { + left: Box::new(crate::script::Expr::Variable("base".to_string())), + op: crate::script::BinaryOp::Add, + right: Box::new(negative_one), + }, + }; + let program = crate::script::Program::new(); + let res = ctx.compile_program(&program, "alias_neg_stage", &[base, head], None, None, None); + assert!( + res.is_ok(), + "expected alias plus negative literal staging to compile" + ); +} + +#[test] +fn pointer_arithmetic_parts_fold_negative_literal_offsets() { + let negative_one = crate::script::Expr::BinaryOp { + left: Box::new(crate::script::Expr::Int(0)), + op: crate::script::BinaryOp::Subtract, + right: Box::new(crate::script::Expr::Int(1)), + }; + let expr = crate::script::Expr::BinaryOp { + left: Box::new(crate::script::Expr::BinaryOp { + left: Box::new(crate::script::Expr::Variable("p".to_string())), + op: crate::script::BinaryOp::Add, + right: Box::new(negative_one), + }), + op: crate::script::BinaryOp::Add, + right: Box::new(crate::script::Expr::Int(3)), + }; + let (base, index) = EbpfContext::<'static, 'static>::pointer_arithmetic_parts(&expr) + .expect("pointer arithmetic parts"); + assert!(matches!(base, crate::script::Expr::Variable(name) if name == "p")); + assert_eq!(index, 2); +} + +#[test] +fn alias_to_alias_copy_is_alias_variable() { + let context = inkwell::context::Context::create(); + let opts = CompileOptions::default(); + let mut ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("ctx"); + // let a = &G_STATE.lib; let b = a; + let a = crate::script::Statement::AliasDeclaration { + name: "a".to_string(), + target: crate::script::Expr::AddressOf(Box::new(crate::script::Expr::MemberAccess( + Box::new(crate::script::Expr::Variable("G_STATE".to_string())), + "lib".to_string(), + ))), + }; + let b = crate::script::Statement::VarDeclaration { + name: "b".to_string(), + value: crate::script::Expr::Variable("a".to_string()), + }; + let program = crate::script::Program::new(); + let res = ctx.compile_program(&program, "alias_copy", &[a, b], None, None, None); + assert!(res.is_ok(), "expected alias-to-alias copy to compile"); +} + +#[test] +fn alias_self_reference_is_rejected_with_cycle_error() { + let context = inkwell::context::Context::create(); + let opts = CompileOptions::default(); + let mut ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("ctx"); + + // let a = &a; print a; + let a = crate::script::Statement::AliasDeclaration { + name: "a".to_string(), + target: crate::script::Expr::AddressOf(Box::new(crate::script::Expr::Variable( + "a".to_string(), + ))), + }; + let p = crate::script::Statement::Print(crate::script::PrintStatement::ComplexVariable( + crate::script::Expr::Variable("a".to_string()), + )); + let program = crate::script::Program::new(); + let res = ctx.compile_program(&program, "alias_self", &[a, p], None, None, None); + assert!(res.is_err(), "expected cycle error, got {res:?}"); + let msg = format!("{:?}", res.err()); + assert!( + msg.contains("alias cycle") || msg.contains("depth exceeded"), + "unexpected error: {msg}" + ); +} + +#[test] +fn alias_mutual_cycle_is_rejected_with_cycle_error() { + let context = inkwell::context::Context::create(); + let opts = CompileOptions::default(); + let mut ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("ctx"); + + // let a = &b; let b = &a; print a; + let a = crate::script::Statement::AliasDeclaration { + name: "a".to_string(), + target: crate::script::Expr::AddressOf(Box::new(crate::script::Expr::Variable( + "b".to_string(), + ))), + }; + let b = crate::script::Statement::AliasDeclaration { + name: "b".to_string(), + target: crate::script::Expr::AddressOf(Box::new(crate::script::Expr::Variable( + "a".to_string(), + ))), + }; + let p = crate::script::Statement::Print(crate::script::PrintStatement::ComplexVariable( + crate::script::Expr::Variable("a".to_string()), + )); + let program = crate::script::Program::new(); + let res = ctx.compile_program(&program, "alias_cycle", &[a, b, p], None, None, None); + assert!(res.is_err(), "expected cycle error, got {res:?}"); + let msg = format!("{:?}", res.err()); + assert!( + msg.contains("alias cycle") || msg.contains("depth exceeded"), + "unexpected error: {msg}" + ); +} + +#[test] +fn strncmp_folds_with_script_string_and_literal_true() { + let context = inkwell::context::Context::create(); + let opts = CompileOptions::default(); + let mut ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("ctx"); + + // Prepare: let s = "ABC"; + let decl = crate::script::Statement::VarDeclaration { + name: "s".to_string(), + value: crate::script::Expr::String("ABC".to_string()), + }; + let program = crate::script::Program::new(); + let res = ctx.compile_program(&program, "decl", &[decl], None, None, None); + assert!(res.is_ok()); + + // Expression: strncmp(s, "ABD", 2) -> true + let expr = crate::script::Expr::BuiltinCall { + name: "strncmp".to_string(), + args: vec![ + crate::script::Expr::Variable("s".to_string()), + crate::script::Expr::String("ABD".to_string()), + crate::script::Expr::Int(2), + ], + }; + let v = ctx.compile_expr(&expr).expect("compile expr"); + match v { + inkwell::values::BasicValueEnum::IntValue(iv) => { + assert_eq!(iv.get_type().get_bit_width(), 1); + // true expected (string repr may vary across LLVM versions, check both forms) + let s = format!("{iv}"); + assert!(s.contains("i1 true") || s.contains("true")); + } + other => panic!("expected IntValue i1, got {other:?}"), + } +} + +#[test] +fn starts_with_folds_with_two_literals() { + let context = inkwell::context::Context::create(); + let opts = CompileOptions::default(); + let mut ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("ctx"); + + // Expression: starts_with("abcdef", "abc") -> true + let expr = crate::script::Expr::BuiltinCall { + name: "starts_with".to_string(), + args: vec![ + crate::script::Expr::String("abcdef".to_string()), + crate::script::Expr::String("abc".to_string()), + ], + }; + let v = ctx.compile_expr(&expr).expect("compile expr"); + match v { + inkwell::values::BasicValueEnum::IntValue(iv) => { + assert_eq!(iv.get_type().get_bit_width(), 1); + let s = format!("{iv}"); + assert!(s.contains("i1 true") || s.contains("true")); + } + _ => panic!("expected i1"), + } +} + +#[test] +fn starts_with_requires_one_string_side_error() { + let context = inkwell::context::Context::create(); + let opts = CompileOptions::default(); + let mut ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("ctx"); + + // Neither side is string + let expr = crate::script::Expr::BuiltinCall { + name: "starts_with".to_string(), + args: vec![crate::script::Expr::Int(1), crate::script::Expr::Int(2)], + }; + let res = ctx.compile_expr(&expr); + assert!(res.is_err(), "expected error"); + let msg = format!("{:?}", res.err()); + assert!(msg.contains("starts_with requires at least one string argument")); +} + +#[test] +fn shadowing_rejected_in_inner_scope() { + let context = inkwell::context::Context::create(); + let opts = CompileOptions::default(); + let mut ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("ctx"); + + // let x = 1; { let x = 2; } + let d1 = crate::script::Statement::VarDeclaration { + name: "x".to_string(), + value: crate::script::Expr::Int(1), + }; + let inner = crate::script::Statement::Block(vec![crate::script::Statement::VarDeclaration { + name: "x".to_string(), + value: crate::script::Expr::Int(2), + }]); + let program = crate::script::Program::new(); + let res = ctx.compile_program(&program, "shadow", &[d1, inner], None, None, None); + assert!(res.is_err(), "expected shadowing error"); + let msg = format!("{:?}", res.err()); + assert!( + msg.contains("Shadowing is not allowed") || msg.contains("shadow"), + "unexpected: {msg}" + ); +} + +#[test] +fn out_of_scope_use_is_rejected() { + let context = inkwell::context::Context::create(); + let opts = CompileOptions::default(); + let mut ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("ctx"); + + // { let y = 2; } print y; -> y is out of scope + let block = crate::script::Statement::Block(vec![crate::script::Statement::VarDeclaration { + name: "y".to_string(), + value: crate::script::Expr::Int(2), + }]); + let print_y = + crate::script::Statement::Print(crate::script::PrintStatement::Variable("y".to_string())); + let program = crate::script::Program::new(); + let res = ctx.compile_program( + &program, + "out_of_scope", + &[block, print_y], + None, + None, + None, + ); + assert!( + res.is_err(), + "expected out-of-scope or missing analyzer error" + ); +} + +#[test] +fn memcmp_rejects_bare_integer_pointer_argument() { + let context = inkwell::context::Context::create(); + let opts = CompileOptions::default(); + let mut ctx = + EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("create EbpfContext"); + + // let q = 0xdeadbeef; // integer, not a pointer value + let decl = crate::script::Statement::VarDeclaration { + name: "q".to_string(), + value: crate::script::Expr::Int(0xdeadbeef), + }; + + // if memcmp(q, hex("00"), 1) { print "X"; } + let if_stmt = crate::script::Statement::If { + condition: crate::script::Expr::BuiltinCall { + name: "memcmp".to_string(), + args: vec![ + crate::script::Expr::Variable("q".to_string()), + crate::script::Expr::BuiltinCall { + name: "hex".to_string(), + args: vec![crate::script::Expr::String("00".to_string())], + }, + crate::script::Expr::Int(1), + ], + }, + then_body: vec![crate::script::Statement::Print( + crate::script::PrintStatement::String("X".to_string()), + )], + else_body: None, + }; + + let program = crate::script::Program::new(); + let res = ctx.compile_program( + &program, + "test_memcmp_int_ptr", + &[decl, if_stmt], + None, + None, + None, + ); + assert!(res.is_err(), "Expected compilation error but got Ok"); +} + +#[test] +fn expr_to_name_truncates_utf8_safely() { + let context = inkwell::context::Context::create(); + let opts = CompileOptions::default(); + let ctx = EbpfContext::new(&context, "test_mod", Some(0), &opts).expect("create ctx"); + // Build a long expression comprised of multibyte chars to exceed 96 chars + let mut chain: Vec = Vec::new(); + for _ in 0..50 { + // each "错误" is 6 bytes, 2 chars -> quickly exceeds 96 chars + chain.push("错误".to_string()); + } + let expr = crate::script::Expr::ChainAccess(chain); + let s = ctx.expr_to_name(&expr); + // Ensure we got a trailing ellipsis and no panic on multibyte boundary + assert!(s.ends_with("...")); + assert!(s.chars().count() <= 96); +} + +#[test] +fn pointer_int_arithmetic_is_rejected_with_friendly_error() { + let context = inkwell::context::Context::create(); + let opts = CompileOptions::default(); + let mut ctx = EbpfContext::new(&context, "ptr_arith", Some(0), &opts).expect("ctx"); + ctx.create_basic_ebpf_function("f").expect("fn"); + + // Create a script variable 'p' of pointer type (null pointer) + let ptr_ty = ctx.context.ptr_type(inkwell::AddressSpace::default()); + let null_ptr = ptr_ty.const_null(); + ctx.store_variable("p", null_ptr.into()).expect("store ptr"); + + // Expression: p + 1 + let expr = crate::script::Expr::BinaryOp { + left: Box::new(crate::script::Expr::Variable("p".to_string())), + op: crate::script::BinaryOp::Add, + right: Box::new(crate::script::Expr::Int(1)), + }; + let res = ctx.compile_expr(&expr); + assert!(res.is_err(), "expected pointer-int arithmetic error"); + let msg = format!("{:?}", res.err()); + assert!( + msg.contains("pointer and integer") + || msg.contains("Unsupported operation between pointer and integer"), + "unexpected error message: {msg}" + ); +} diff --git a/ghostscope-compiler/src/ebpf/codegen/types.rs b/ghostscope-compiler/src/ebpf/codegen/types.rs new file mode 100644 index 0000000..1d6ccd9 --- /dev/null +++ b/ghostscope-compiler/src/ebpf/codegen/types.rs @@ -0,0 +1,176 @@ +use super::*; + +impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { + /// Resolve variable with correct priority: script variables first, then DWARF variables + /// This method is copied from protocol.rs to maintain functionality + pub fn resolve_variable_with_priority(&mut self, var_name: &str) -> Result<(u16, TypeKind)> { + info!("Resolving variable '{}' with correct priority", var_name); + + // Step 1: Check if it's a script-defined variable first + if self.variable_exists(var_name) { + info!("Found script variable: {}", var_name); + + // Get the variable's LLVM value to infer type + let loaded_value = self.load_variable(var_name)?; + let type_encoding = self.infer_type_from_llvm_value(&loaded_value); + + // Add to TraceContext + let var_name_index = self.trace_context.add_variable_name(var_name.to_string()); + + return Ok((var_name_index, type_encoding)); + } + + // Step 2: If not found in script variables, try DWARF variables + info!( + "Variable '{}' not found in script variables, checking DWARF", + var_name + ); + + let compile_context = self.get_compile_time_context()?.clone(); + let read_plan = match self.query_dwarf_for_variable(var_name)? { + Some(var) => var, + None => { + return Err(CodeGenError::VariableNotFound(format!( + "Variable '{}' not found in script or DWARF at PC 0x{:x} in module '{}'", + var_name, compile_context.pc_address, compile_context.module_path + ))); + } + }; + + // Convert DWARF type information to TypeKind using existing method + let dwarf_type = read_plan.dwarf_type.as_ref().ok_or_else(|| { + CodeGenError::DwarfError("Variable has no DWARF type information".to_string()) + })?; + let type_encoding = TypeKind::from(dwarf_type); + + // Add to StringTable + let var_name_index = self.trace_context.add_variable_name(var_name.to_string()); + + info!( + "DWARF variable '{}' resolved successfully with type: {:?}", + var_name, type_encoding + ); + + Ok((var_name_index, type_encoding)) + } + + /// Synthesize a DWARF-like TypeInfo for a basic TypeKind (for script variables) + pub(super) fn synthesize_typeinfo_for_typekind( + &self, + kind: TypeKind, + ) -> ghostscope_dwarf::TypeInfo { + use ghostscope_dwarf::constants::{ + DW_ATE_boolean, DW_ATE_float, DW_ATE_signed, DW_ATE_signed_char, DW_ATE_unsigned, + }; + use ghostscope_dwarf::TypeInfo as TI; + + match kind { + TypeKind::Bool => TI::BaseType { + name: "bool".to_string(), + size: 1, + encoding: DW_ATE_boolean.0 as u16, + }, + TypeKind::F32 => TI::BaseType { + name: "f32".to_string(), + size: 4, + encoding: DW_ATE_float.0 as u16, + }, + TypeKind::F64 => TI::BaseType { + name: "f64".to_string(), + size: 8, + encoding: DW_ATE_float.0 as u16, + }, + TypeKind::I8 => TI::BaseType { + name: "i8".to_string(), + size: 1, + encoding: DW_ATE_signed_char.0 as u16, + }, + TypeKind::I16 => TI::BaseType { + name: "i16".to_string(), + size: 2, + encoding: DW_ATE_signed.0 as u16, + }, + TypeKind::I32 => TI::BaseType { + name: "i32".to_string(), + size: 4, + encoding: DW_ATE_signed.0 as u16, + }, + TypeKind::I64 => TI::BaseType { + name: "i64".to_string(), + size: 8, + encoding: DW_ATE_signed.0 as u16, + }, + TypeKind::U8 | TypeKind::Char => TI::BaseType { + name: "u8".to_string(), + size: 1, + encoding: DW_ATE_unsigned.0 as u16, + }, + TypeKind::U16 => TI::BaseType { + name: "u16".to_string(), + size: 2, + encoding: DW_ATE_unsigned.0 as u16, + }, + TypeKind::U32 => TI::BaseType { + name: "u32".to_string(), + size: 4, + encoding: DW_ATE_unsigned.0 as u16, + }, + TypeKind::U64 => TI::BaseType { + name: "u64".to_string(), + size: 8, + encoding: DW_ATE_unsigned.0 as u16, + }, + TypeKind::Pointer | TypeKind::CString | TypeKind::String | TypeKind::Unknown => { + // Use void* as a reasonable default for pointers/strings in script land + TI::PointerType { + target_type: Box::new(TI::UnknownType { + name: "void".to_string(), + }), + size: 8, + } + } + TypeKind::NullPointer => TI::PointerType { + target_type: Box::new(TI::UnknownType { + name: "void".to_string(), + }), + size: 8, + }, + _ => TI::BaseType { + name: "i64".to_string(), + size: 8, + encoding: DW_ATE_signed.0 as u16, + }, + } + } + + pub(super) fn add_synthesized_type_index_for_kind(&mut self, kind: TypeKind) -> u16 { + let ti = self.synthesize_typeinfo_for_typekind(kind); + self.trace_context.add_type(ti) + } + + /// Infer TypeKind from LLVM value type + /// Copied from protocol.rs + pub(super) fn infer_type_from_llvm_value(&self, value: &BasicValueEnum<'_>) -> TypeKind { + match value { + BasicValueEnum::IntValue(int_val) => { + match int_val.get_type().get_bit_width() { + 1 => TypeKind::Bool, + 8 => TypeKind::I8, // Default to signed for script variables + 16 => TypeKind::I16, + 32 => TypeKind::I32, + 64 => TypeKind::I64, + _ => TypeKind::I64, // Default fallback + } + } + BasicValueEnum::FloatValue(float_val) => { + match float_val.get_type() { + t if t == self.context.f32_type() => TypeKind::F32, + t if t == self.context.f64_type() => TypeKind::F64, + _ => TypeKind::F64, // Default fallback + } + } + BasicValueEnum::PointerValue(_) => TypeKind::Pointer, + _ => TypeKind::I64, // Conservative default + } + } +}