diff --git a/src/cfa.rs b/src/cfa.rs new file mode 100644 index 0000000..582b581 --- /dev/null +++ b/src/cfa.rs @@ -0,0 +1,100 @@ +use std::borrow::Borrow; +use std::cell::RefCell; +use std::collections::HashMap; +use auxtools::raw_types::values::ValueTag::Area; +use dmasm::Instruction::Ref; +use typed_arena::Arena; +use crate::dmir::DMIR; + +pub struct ControlFlowGraph<'t> { + analyzer: &'t ControlFlowAnalyzer<'t>, + pub nodes: HashMap> +} + +pub struct CFGNode<'t> { + pub label: String, + pub inbound: RefCell>>, + pub outbound: RefCell>> +} + +impl<'t> CFGNode<'t> { + fn new(label: String) -> Self { + Self { + label, + inbound: RefCell::new(vec![]), + outbound: RefCell::new(vec![]) + } + } + + fn add_outbound_edge(&self, target: &'t CFGNode<'t>) { + self.outbound.borrow_mut().push(target); + target.inbound.borrow_mut().push(target); + } + +} + +pub struct ControlFlowAnalyzer<'t> { + arena: Arena> +} + + +impl<'t> ControlFlowGraph<'t> { + fn get_or_create_node<'q>(&'q mut self, label: &'_ str) -> &'t CFGNode<'t> { + let arena = &self.analyzer.arena; + *self.nodes.entry(label.to_owned()) + .or_insert_with(|| arena.alloc(CFGNode::new(label.to_owned()))) + } +} + +impl<'t> ControlFlowAnalyzer<'t> { + pub fn new() -> Self { + Self { + arena: Default::default() + } + } + + pub fn analyze(&'t self, instructions: &Vec) -> ControlFlowGraph<'t> { + let mut graph = ControlFlowGraph { + analyzer: self, + nodes: Default::default() + }; + + + let entry = graph.get_or_create_node("entry"); + let exit = graph.get_or_create_node("exit"); + + + let mut current_node = entry; + + for instruction in instructions { + match instruction { + DMIR::ValueTagSwitch(_, cases) => { + for (_, label) in cases { + current_node.add_outbound_edge(graph.get_or_create_node(label)); + } + } + DMIR::Ret => { + current_node.add_outbound_edge(exit); + } + DMIR::EnterBlock(label) => { + current_node = graph.get_or_create_node(label); + } + DMIR::JZ(label) | + DMIR::JZInternal(label) | + DMIR::JNZInternal(label) | + DMIR::Jmp(label) => { + current_node.add_outbound_edge( + graph.get_or_create_node(label) + ) + } + DMIR::Deopt(_, _) => {} + DMIR::End => { + current_node.add_outbound_edge(exit); + } + _ => {} + } + } + + graph + } +} diff --git a/src/codegen.rs b/src/codegen.rs index a473dce..a3c7736 100644 --- a/src/codegen.rs +++ b/src/codegen.rs @@ -774,7 +774,7 @@ impl<'ctx> CodeGen<'ctx, '_> { let meta_value = self.emit_load_meta_value(value); let mut jumps = Vec::new(); let mut default = Option::None; - for (predicate, block) in cases.as_ref() { + for (predicate, block) in cases { let mut predicates = Vec::new(); fn to_linear(predicate: &ValueTagPredicate, out: &mut Vec) { match predicate { diff --git a/src/compile.rs b/src/compile.rs index ce44042..1c161bf 100644 --- a/src/compile.rs +++ b/src/compile.rs @@ -21,8 +21,10 @@ use llvm_sys::execution_engine::LLVMExecutionEngineGetErrMsg; use crate::{ByondProcFunc, chad_hook_by_id, DisassembleEnv, dmir, guard, pads}; use crate::codegen::CodeGen; +use crate::dfa::analyze_and_dump_dfa; use crate::dmir::DMIR; use crate::proc_meta::{ProcMeta, ProcMetaModuleBuilder}; +use crate::ref_count2::generate_ref_count_operations2; use crate::ref_count::generate_ref_count_operations; use crate::section_memory_manager_bindings::{Section, SectionMemoryManager}; use crate::stack_map::{read_stack_map, StackMap}; @@ -251,6 +253,9 @@ fn compile_proc<'ctx>( log::debug!("DMIR created"); variable_termination_pass(&mut irs); log::debug!("variable_termination_pass done"); + + generate_ref_count_operations2(&mut irs, proc.parameter_names().len()); + log::debug!("====== DFA done ======"); generate_ref_count_operations(&mut irs, proc.parameter_names().len()); log::debug!("ref_count_pass done"); diff --git a/src/dfa.rs b/src/dfa.rs new file mode 100644 index 0000000..f9b82b6 --- /dev/null +++ b/src/dfa.rs @@ -0,0 +1,674 @@ +use std::borrow::BorrowMut; +use std::cell::{Cell, RefCell}; +use std::collections::HashMap; +use std::env::var; +use typed_arena::Arena; +use crate::dmir::{DMIR}; +use DFValueLocation::*; +use FlowVariableExpression::*; +use std::fmt::Write; + +/// This module provides data-flow analysis capability +#[derive(Clone)] +pub struct InterpreterState<'t> { + pub arguments: Vec<&'t FlowVariable<'t>>, + pub stack: Vec<&'t FlowVariable<'t>>, + pub locals: HashMap>, + pub cache: Option<&'t FlowVariable<'t>>, +} + +#[derive(Clone, Debug, Eq, PartialEq, Hash)] +pub enum DFValueLocation { + Local(u32), + Cache, + Argument(u32), + Stack(u8), +} + +impl InterpreterState<'_> { + fn new() -> Self { + Self { + arguments: vec![], + stack: vec![], + locals: Default::default(), + cache: None, + } + } +} + +pub enum FlowVariableExpression<'t> { + /// Re-assign of different variable + Variable(&'t FlowVariable<'t>), + /// Merge of different variables in SSA + Phi(RefCell>>), + /// Opaque read, for example return of call + In, +} + +pub struct FlowVariable<'t> { + pub location: DFValueLocation, + pub expression: FlowVariableExpression<'t>, + pub uses: RefCell>>, +} + +use std::hash::{Hash, Hasher}; +use itertools::Itertools; +use crate::cfa::{ControlFlowAnalyzer, ControlFlowGraph}; +use crate::dmir_annotate::Annotator; +use crate::ref_count::ref_identity; +ref_identity!(FlowVariable<'_>); +ref_identity!(FlowVariableConsume<'_>); + +pub enum FlowVariableConsume<'t> { + /// Consume without using variable + Unset(&'t FlowVariable<'t>), + /// Generic opaque write, for example write to global variable + Out(&'t FlowVariable<'t>), +} + +pub enum FlowVariableUse<'t> { + Consume(&'t FlowVariableConsume<'t>), + Move(&'t FlowVariable<'t>), + Rename(&'t FlowVariable<'t>) +} + +pub struct OperationEffect<'t> { + pub variables: Vec<&'t FlowVariable<'t>>, + pub consumes: Vec<&'t FlowVariableConsume<'t>>, + pub stack_size: usize, +} + +pub struct DataFlowAnalyzer<'t, 'graph> { + arena: Arena>, + consumes_arena: Arena>, + control_flow_graph: &'graph ControlFlowGraph<'graph>, +} + +pub struct DataFlowAnalyzerState<'t, 'graph> { + analyzer: &'t DataFlowAnalyzer<'t, 'graph>, + state: InterpreterState<'t>, + blocks: HashMap>, +} + +impl<'t, 'graph> DataFlowAnalyzerState<'t, 'graph> { + pub fn analyze_instruction(&mut self, ir: &DMIR) -> OperationEffect<'t> { + self.analyzer.analyze_instruction( + &mut self.state, + &mut self.blocks, + ir + ) + } + + pub fn state<'q>(&'q self) -> &'q InterpreterState<'t> { + &self.state + } + + pub fn stack_top<'q>(&'q self, idx: usize) -> &'t FlowVariable<'t> { + let stack_size = self.state.stack.len(); + self.state.stack[stack_size - idx - 1] + } + + pub fn rename<'q>(&'q mut self, variable: &'t FlowVariable<'t>) -> &'t FlowVariable<'t> { + + } +} + +impl<'t, 'graph> DataFlowAnalyzer<'t, 'graph> { + pub fn new(control_flow_graph: &'graph ControlFlowGraph<'graph>) -> Self { + Self { + arena: Arena::new(), + consumes_arena: Arena::new(), + control_flow_graph, + } + } + + fn create_variable<'q>( + &'t self, + location: DFValueLocation, + expression: FlowVariableExpression<'t>, + state: &'q mut InterpreterState<'t>, + ) -> &'t FlowVariable<'t> { + let variable = self.arena.alloc( + FlowVariable { + location, + expression, + uses: RefCell::new(vec![]), + } + ); + + match &variable.location { + Stack(idx) => { + if *idx as usize == state.stack.len() { + state.stack.push(variable) + } else { + state.stack[*idx as usize] = variable; + } + } + Cache => { + state.cache = Some(variable); + } + Local(idx) => { + assert!(state.locals.insert(*idx, variable).is_none()); + } + Argument(idx) => { + state.arguments[*idx as usize] = variable; + } + } + + match &variable.expression { + Variable(var) => var.uses.borrow_mut().push(FlowVariableUse::Move(variable)), + _ => {} + } + + return variable; + } + + pub fn enter_function(&'t self, argument_count: u32) -> (OperationEffect<'t>, DataFlowAnalyzerState<'t, 'graph>) { + let mut state = InterpreterState::new(); + + for idx in 0..argument_count { + state.arguments.push( + self.arena.alloc( + FlowVariable { + location: Argument(idx), + expression: In, + uses: RefCell::new(vec![]), + } + ) + ); + } + let enter_effect = OperationEffect { + variables: state.arguments.clone(), + consumes: vec![], + stack_size: 0, + }; + + (enter_effect, DataFlowAnalyzerState { + analyzer: &self, + state, + blocks: Default::default() + }) + } + + fn epilogue_effect(&'t self, state: &mut InterpreterState<'t>, effect: &mut OperationEffect<'t>) { + for (_, var) in state.locals.drain() { + effect.consumes.push(self.create_consume(FlowVariableConsume::Unset(var))); + } + for var in state.arguments.drain(..) { + effect.consumes.push(self.create_consume(FlowVariableConsume::Unset(var))); + } + if let Some(var) = &state.cache.take() { + effect.consumes.push(self.create_consume(FlowVariableConsume::Unset(var))); + } + } + + fn merge_phi(target: &'t FlowVariable<'t>, source: &'t FlowVariable<'t>) { + match &target.expression { + Phi(sources) => { + source.uses.borrow_mut().push(FlowVariableUse::Move(target)); + sources.borrow_mut().push(source); + } + _ => panic!("unexpected target for Phi merging") + } + } + fn make_phi(&'t self, source: &'t FlowVariable<'t>) -> &'t FlowVariable<'t> { + self.arena.alloc( + FlowVariable { + location: source.location.clone(), + expression: Phi(RefCell::new(vec![source])), + uses: RefCell::new(vec![]), + } + ) + } + + fn create_consume(&'t self, consume: FlowVariableConsume<'t>) -> &'t FlowVariableConsume<'t> { + let allocated: &FlowVariableConsume = self.consumes_arena.alloc(consume); + match allocated { + FlowVariableConsume::Unset(variable) | + FlowVariableConsume::Out(variable) => { + variable.uses.borrow_mut().push(FlowVariableUse::Consume(allocated)); + } + } + return allocated; + } + + fn block_has_single_predecessor(&self, label: &str) -> bool { + return self.control_flow_graph.nodes.get(label).unwrap().inbound.borrow().len() < 2; + } + + fn merge_block( + &'t self, + current_state: &InterpreterState<'t>, + blocks: &mut HashMap>, + label: &str, + ) { + let has_single_predecessor = self.block_has_single_predecessor(label); + if has_single_predecessor { + assert!(blocks.insert(label.to_owned(), current_state.clone()).is_none()) + } else { + blocks.entry(label.to_owned()) + .and_modify(|state| { + state.arguments.iter().zip(current_state.arguments.iter()).for_each(|(target, source)| Self::merge_phi(target, source)); + state.stack.iter().zip(current_state.stack.iter()).for_each(|(target, source)| Self::merge_phi(target, source)); + assert_eq!(state.locals.len(), current_state.locals.len()); + for (idx, var) in current_state.locals.iter() { + Self::merge_phi(state.locals[idx], var); + } + state.cache.iter().zip(current_state.cache.iter()).for_each(|(target, source)| Self::merge_phi(target, source)); + }) + .or_insert_with(|| { + InterpreterState { + arguments: current_state.arguments.iter().map(|var| self.make_phi(var)).collect(), + stack: current_state.stack.iter().map(|var| self.make_phi(var)).collect(), + locals: current_state.locals.iter().map(|(idx, var)| (*idx, self.make_phi(var))).collect(), + cache: current_state.cache.map(|var| self.make_phi(var)), + } + }); + } + } + + fn analyze_instruction<'q>( + &'t self, + state: &'q mut InterpreterState<'t>, + blocks: &'q mut HashMap>, + instruction: &'q DMIR, + ) -> OperationEffect<'t> { + let mut effect = OperationEffect { variables: vec![], consumes: vec![], stack_size: 0 }; + + macro_rules! mk_var { + ($loc:expr => $expr:expr) => { + let v = self.create_variable($loc, $expr, state); + effect.variables.push(v); + }; + } + macro_rules! stack_top { + () => { stack_top!(0) }; + ($loc:expr) => { DFValueLocation::Stack((state.stack.len() - $loc) as u8) }; + } + macro_rules! unset { + ($var:expr) => { + effect.consumes.push( + self.create_consume(FlowVariableConsume::Unset($var)) + ) + }; + } + macro_rules! out { + ($var:expr) => { + effect.consumes.push( + self.create_consume(FlowVariableConsume::Out($var)) + ) + }; + } + + match instruction { + DMIR::GetLocal(idx) => { + mk_var!(stack_top!() => Variable(state.locals[idx])); + } + DMIR::SetLocal(idx) => { + if let Some(var) = state.locals.remove(idx) { + unset!(var) + } + mk_var!(Local(*idx) => Variable(state.stack.pop().unwrap())); + } + DMIR::GetSrc => { + mk_var!(stack_top!() => In); + } + DMIR::GetArg(idx) => { + mk_var!(stack_top!() => Variable(state.arguments[*idx as usize])); + } + DMIR::SetArg(idx) => { + unset!(state.arguments[*idx as usize]); + mk_var!(Argument(*idx) => Variable(state.stack.pop().unwrap())); + } + DMIR::SetCache => { + if let Some(var) = state.cache.take() { + unset!(var) + } + mk_var!(Cache => Variable(state.stack.pop().unwrap())); + } + DMIR::GetCacheField(_) => { + out!(state.cache.unwrap()); + mk_var!(stack_top!() => In); + } + DMIR::SetCacheField(_) => { + out!(state.cache.unwrap()); + out!(state.stack.pop().unwrap()); + } + DMIR::PushCache => { + mk_var!(stack_top!() => Variable(state.cache.unwrap())); + } + DMIR::ValueTagSwitch(_, cases) => { + for (_, label) in cases { + self.merge_block( + state, + blocks, + label, + ); + } + } + DMIR::RoundN | DMIR::FloatCmp(_) | DMIR::FloatAdd | DMIR::FloatSub | DMIR::FloatMul | DMIR::FloatDiv => { + out!(state.stack.pop().unwrap()); + out!(state.stack.pop().unwrap()); + mk_var!(stack_top!() => In); + } + DMIR::FloatAbs => { + out!(state.stack.pop().unwrap()); + mk_var!(stack_top!() => In); + } + DMIR::FloatInc | DMIR::FloatDec => { + out!(state.stack.pop().unwrap()); + mk_var!(stack_top!() => In); + } + DMIR::BitAnd | DMIR::BitOr => { + out!(state.stack.pop().unwrap()); + out!(state.stack.pop().unwrap()); + mk_var!(stack_top!() => In); + } + DMIR::ListCheckSizeDeopt(_, _, _) => {} + DMIR::ListCopy => { + out!(state.stack.pop().unwrap()); + mk_var!(stack_top!() => In); + } + DMIR::ListSubSingle | DMIR::ListAddSingle => { + out!(state.stack.pop().unwrap()); + out!(state.stack.pop().unwrap()); + } + DMIR::ListAssociativeGet | DMIR::ListIndexedGet => { + out!(state.stack.pop().unwrap()); + out!(state.stack.pop().unwrap()); + mk_var!(stack_top!() => In); + } + DMIR::ListAssociativeSet | DMIR::ListIndexedSet => { + out!(state.stack.pop().unwrap()); + out!(state.stack.pop().unwrap()); + out!(state.stack.pop().unwrap()); + } + DMIR::NewVectorList(size) => { + for _ in 0..*size { + out!(state.stack.pop().unwrap()); + } + mk_var!(stack_top!() => In); + } + DMIR::NewAssocList(size, _) => { + for _ in 0..*size { + out!(state.stack.pop().unwrap()); + out!(state.stack.pop().unwrap()); + } + mk_var!(stack_top!() => In); + } + DMIR::ArrayIterLoadFromObject(_) | DMIR::ArrayIterLoadFromList(_) => { + out!(state.stack.pop().unwrap()); + } + DMIR::IterAllocate => {} + DMIR::IterPop => {} + DMIR::IterPush => {} + DMIR::IterNext => { + mk_var!(stack_top!() => In); + } + DMIR::GetStep => { + out!(state.stack.pop().unwrap()); + out!(state.stack.pop().unwrap()); + mk_var!(stack_top!() => In); + } + DMIR::PushInt(_) | DMIR::PushVal(_) => { + mk_var!(stack_top!() => In); + } + DMIR::PushTestFlag => { + mk_var!(stack_top!() => In); + } + DMIR::SetTestFlag(_) => {} + DMIR::Pop => { + unset!(state.stack.pop().unwrap()); + } + DMIR::Ret => { + out!(state.stack.pop().unwrap()); + self.epilogue_effect(state, &mut effect); + } + DMIR::Not => { + out!(state.stack.pop().unwrap()); + mk_var!(stack_top!() => In); + } + DMIR::Test => { + out!(state.stack.pop().unwrap()); + } + DMIR::TestEqual => { + out!(state.stack.pop().unwrap()); + out!(state.stack.pop().unwrap()); + } + DMIR::TestIsDMEntity => { + out!(state.stack.pop().unwrap()); + } + DMIR::IsSubtypeOf => { + out!(state.stack.pop().unwrap()); + out!(state.stack.pop().unwrap()); + mk_var!(stack_top!() => In); + } + DMIR::JZ(label) => { + self.merge_block( + state, + blocks, + label, + ); + } + DMIR::Dup => { + let stack_top = state.stack.pop().unwrap(); + mk_var!(stack_top!() => Variable(stack_top)); + mk_var!(stack_top!() => Variable(stack_top)); + } + DMIR::DupX1 => { + let b = state.stack.pop().unwrap(); + let a = state.stack.pop().unwrap(); + mk_var!(stack_top!() => Variable(b)); + mk_var!(stack_top!() => Variable(a)); + mk_var!(stack_top!() => Variable(b)); + } + DMIR::DupX2 => { + let c = state.stack.pop().unwrap(); + let b = state.stack.pop().unwrap(); + let a = state.stack.pop().unwrap(); + mk_var!(stack_top!() => Variable(c)); + mk_var!(stack_top!() => Variable(a)); + mk_var!(stack_top!() => Variable(b)); + mk_var!(stack_top!() => Variable(c)); + } + DMIR::Swap => { + let b = state.stack.pop().unwrap(); + let a = state.stack.pop().unwrap(); + + mk_var!(stack_top!() => Variable(b)); + mk_var!(stack_top!() => Variable(a)); + } + DMIR::SwapX1 => { + let c = state.stack.pop().unwrap(); + let b = state.stack.pop().unwrap(); + let a = state.stack.pop().unwrap(); + + mk_var!(stack_top!() => Variable(b)); + mk_var!(stack_top!() => Variable(c)); + mk_var!(stack_top!() => Variable(a)); + } + DMIR::TestInternal => { + out!(state.stack.pop().unwrap()); + } + DMIR::Jmp(label) | DMIR::JZInternal(label) | DMIR::JNZInternal(label) => { + self.merge_block(state, blocks, label); + } + DMIR::EnterBlock(lbl) => { + let block = blocks.get(lbl).expect("Block not initialized"); + // replace state completely + *state = block.clone(); + if !self.block_has_single_predecessor(lbl) { + effect.variables.append( + &mut block.arguments.clone() + ); + effect.variables.extend( + block.stack.iter() + ); + effect.variables.extend( + block.locals.values() + ); + if let Some(var) = block.cache { + effect.variables.push(var); + }; + } + } + DMIR::InfLoopCheckDeopt(_) => {} + DMIR::Deopt(_, _) => {} + DMIR::CheckTypeDeopt(_, _, _) => {} + DMIR::CallProcByName(_, _, arg_count) | DMIR::CallProcById(_, _, arg_count) => { + let src = state.stack.pop().unwrap(); + out!(src); + for _ in 0..*arg_count { + out!(state.stack.pop().unwrap()); + } + mk_var!(stack_top!() => In); + } + DMIR::NewDatum(arg_count) => { + let arg_count = *arg_count as usize; + let pos = state.stack.len() - 1 - arg_count; + out!(state.stack[pos]); + mk_var!(stack_top!(arg_count) => In); + mk_var!(stack_top!() => Variable(state.stack[pos])); + } + DMIR::IncRefCount { .. } => {} // todo + DMIR::DecRefCount { .. } => {} // todo + DMIR::Nop => {} + DMIR::UnsetLocal(idx) => unset!(state.locals.remove(idx).unwrap()), + DMIR::UnsetCache => unset!(state.cache.take().unwrap()), + DMIR::End => { + self.epilogue_effect(state, &mut effect); + } + } + + effect.stack_size = state.stack.len(); + effect + } +} + +pub fn analyze_and_dump_dfa(instructions: &Vec, argument_count: u32) { + let cfa = ControlFlowAnalyzer::new(); + let graph = cfa.analyze(&instructions); + + let mut analyzer = DataFlowAnalyzer::new(&graph); + + // let r = analyzer.analyze( + // &instructions, + // argument_count, + // ); + + // dump_dfa(instructions, &r); +} + +pub fn dump_dfa(instructions: &Vec, data_flow_info: &Vec) { + + let mut next_id = 0u32; + let mut var_ids = HashMap::new(); + let mut consume_ids = HashMap::new(); + let mut idx_by_id = HashMap::new(); + + for (idx, effect) in data_flow_info.iter().enumerate() { + for variable in &effect.variables { + let entry = var_ids.entry(*variable).or_insert_with(|| { + let v = next_id; + next_id += 1; + v + }); + idx_by_id.insert(*entry, idx); + } + for consume in &effect.consumes { + let entry = consume_ids.entry(*consume).or_insert_with(|| { + let v = next_id; + next_id += 1; + v + }); + idx_by_id.insert(*entry, idx); + } + } + + let mut annotator = Annotator::new(); + + + for (idx, effect) in data_flow_info.iter().skip(1).enumerate() { + for consume in &effect.consumes { + let str = match consume { + FlowVariableConsume::Unset(var) => { + format!("unset %{}", var_ids[var]) + } + FlowVariableConsume::Out(var) => { + format!("out %{}", var_ids[var]) + } + }; + annotator.add(idx, str) + } + for variable in &effect.variables { + let mut str = match &variable.expression { + Variable(other) => format!("%{} = %{}", var_ids[variable], var_ids[other]), + Phi(other) => format!("%{} = φ[{}]", var_ids[variable], other.borrow().iter().map(|var| format!("%{}", var_ids[var])).join(", ")), + In => format!("%{} = in", var_ids[variable]) + }; + + if variable.uses.borrow().len() > 0 { + write!(&mut str, " - used ({})", + variable.uses.borrow().iter().format_with( + ", ", + |el, f| { + match el { + FlowVariableUse::Consume(consume) => { + f(&format_args!("{}", idx_by_id[&consume_ids[consume]] - 1)) + } + FlowVariableUse::Move(var) => { + f(&format_args!("{}", idx_by_id[&var_ids[var]] - 1)) + } + } + }, + ) + ).unwrap(); + } + annotator.add(idx, str); + } + } + + annotator.dump_annotated(&instructions); +} + +mod tests { + use std::borrow::Borrow; + use std::collections::HashMap; + use auxtools::raw_types::procs::ProcId; + use dmasm::format; + use dmasm::operands::ValueOpRaw; + use itertools::Itertools; + use libc::fopen; + use log::LevelFilter; + use crate::cfa::ControlFlowAnalyzer; + use crate::dfa::{analyze_and_dump_dfa, DataFlowAnalyzer, FlowVariable, FlowVariableConsume, FlowVariableExpression}; + use crate::dmir::DMIR; + use crate::dmir_annotate::Annotator; + + #[test] + fn test_dfa_unbalanced_wtf() { + simple_logging::log_to_stderr(LevelFilter::Trace); + + + fn push_null() -> DMIR { + DMIR::PushVal(ValueOpRaw { tag: 0, data: 0 }) + } + let instructions = vec![ + push_null(), + DMIR::CallProcById(ProcId(0), 0, 0), + DMIR::SetLocal(0), + DMIR::GetLocal(0), + DMIR::SetLocal(1), + push_null(), + DMIR::SetLocal(0), + DMIR::GetLocal(1), + push_null(), + DMIR::CallProcById(ProcId(1), 0, 1), + DMIR::Pop, + DMIR::End, + ]; + + analyze_and_dump_dfa(&instructions, 0); + //panic!(); + } +} \ No newline at end of file diff --git a/src/dmir.rs b/src/dmir.rs index 3ece702..6a0922f 100755 --- a/src/dmir.rs +++ b/src/dmir.rs @@ -22,7 +22,7 @@ pub enum DMIR { GetCacheField(u32), SetCacheField(u32), PushCache, - ValueTagSwitch(ValueLocation, Box>), + ValueTagSwitch(ValueLocation, Vec<(ValueTagPredicate, String)>), FloatAdd, FloatSub, FloatMul, @@ -243,11 +243,14 @@ fn build_float_bin_op_deopt(action: DMIR, data: &DebugData, proc: &Proc, out: &m fn decode_switch(value: ValueLocation, switch_id: &mut u32, cases: Vec<(ValueTagPredicate, Vec)>, out: &mut Vec) { let switch_exit = format!("switch_{}_exit", switch_id); let (predicates, blocks): (Vec<_>, Vec<_>) = cases.into_iter().unzip(); - out.push(DMIR::ValueTagSwitch(value, Box::new( - predicates.into_iter().enumerate().map( - |(index, predicate)| (predicate, format!("switch_{}_case_{}", switch_id, index)) - ).collect() - ))); + out.push( + DMIR::ValueTagSwitch( + value, + predicates.into_iter().enumerate().map( + |(index, predicate)| (predicate, format!("switch_{}_case_{}", switch_id, index)) + ).collect() + ) + ); let mut case_counter = 0; for mut instructions in blocks { out.push(EnterBlock(format!("switch_{}_case_{}", switch_id, case_counter))); diff --git a/src/lib.rs b/src/lib.rs index 2d7fc2b..ec45723 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -26,6 +26,9 @@ mod test_utils; pub(crate) mod stack_map; mod section_memory_manager_bindings; pub(crate) mod proc_meta; +pub(crate) mod dfa; +pub(crate) mod cfa; +pub(crate) mod ref_count2; #[macro_use] diff --git a/src/ref_count.rs b/src/ref_count.rs index d483867..1a65c55 100755 --- a/src/ref_count.rs +++ b/src/ref_count.rs @@ -38,6 +38,8 @@ macro_rules! ref_identity { }; } +pub(crate) use ref_identity; + ref_identity!(RValue<'_>); /// Denotes value drains @@ -326,7 +328,7 @@ impl<'t> Analyzer<'t> { ); } DMIR::ValueTagSwitch(_, cases) => { - for (_, block) in cases.as_ref() { + for (_, block) in cases { Analyzer::merge_block( &self.stack, &self.cache, diff --git a/src/ref_count2.rs b/src/ref_count2.rs new file mode 100644 index 0000000..4946a93 --- /dev/null +++ b/src/ref_count2.rs @@ -0,0 +1,388 @@ +use std::borrow::Borrow; +use std::cell::RefCell; +use std::cmp::max; +use std::collections::{HashMap, HashSet}; +use std::env::var; +use itertools::Itertools; +use log::log; +use typed_arena::Arena; +use FlowVariableConsume::{Out, Unset}; +use RefCountOp::ProduceUncounted; +use RefOpDisposition::{Post, Pre}; +use crate::cfa::ControlFlowAnalyzer; +use crate::dfa; +use crate::dfa::{analyze_and_dump_dfa, DataFlowAnalyzer, DataFlowAnalyzerState, DFValueLocation, dump_dfa, FlowVariable, FlowVariableConsume, FlowVariableExpression, OperationEffect}; +use crate::dmir::{DMIR, RefOpDisposition, ValueLocation}; +use crate::dmir::DMIR::{DecRefCount, IncRefCount}; +use crate::dmir::RefOpDisposition::DupPost; +use crate::dmir::ValueLocation::{Argument, Cache, Local, Stack}; +use crate::inventory::iter; +use crate::ref_count2::RefCountOp::{Dec, Inc, Instruction, Split}; + + +enum RefCountOpNode<'t> { + Source(&'t FlowVariable<'t>), + Op(&'t RefCountOpNode<'t>, usize) +} + +pub fn generate_ref_count_operations2( + ir: &mut Vec, + parameter_count: usize, +) { + + let cfa = ControlFlowAnalyzer::new(); + let graph = cfa.analyze(ir); + + let mut analyzer = DataFlowAnalyzer::new(&graph); + + let (_, mut dfa_state) = analyzer.enter_function(parameter_count as u32); + + let mut ops = vec![]; + for (idx, instruction) in ir.iter().enumerate() { + analyze_instruction(idx, instruction, &mut dfa_state, &mut ops); + } + + let mut var_idx = VarIdx { var_id: Default::default(), next: 0 }; + + + // let arena = typed_arena::Arena::new(); + + let mut ref_op_node_by_var = HashMap::new(); + + fn bind_node_to_var<'t>( + var: &'t FlowVariable<'t>, + nodes: &mut HashMap<&'t FlowVariable<'t>, Vec>, + idx: usize + ) { + let vec = nodes.entry(var).or_insert(Default::default()); + vec.push(idx); + } + + for (idx, op) in ops.iter().enumerate() { + match op { + Instruction(_, effect) => { + for var in effect.variables.iter() { + bind_node_to_var(unwrap_var(var), &mut ref_op_node_by_var, idx) + } + for consume in effect.consumes.iter() { + match consume { + Unset(var) => { + bind_node_to_var(unwrap_var(var), &mut ref_op_node_by_var, idx) + } + Out(var) => { + bind_node_to_var(unwrap_var(var), &mut ref_op_node_by_var, idx) + } + } + } + } + Inc(var) => { + bind_node_to_var(unwrap_var(var), &mut ref_op_node_by_var, idx) + } + Dec(var) => { + bind_node_to_var(unwrap_var(var), &mut ref_op_node_by_var, idx) + } + ProduceUncounted(var) => { + bind_node_to_var(unwrap_var(var), &mut ref_op_node_by_var, idx) + } + Split(block, vars) => { + for var in vars.iter() { + bind_node_to_var(unwrap_var(var), &mut ref_op_node_by_var, idx) + } + } + } + } + + for key in ref_op_node_by_var.keys().sorted_by_key(|a| var_idx.var_id(a)) { + let v = &ref_op_node_by_var[key]; + log::debug!("------- Operation for variable {}:", var_idx.var_id(key)); + for insn in v.iter() { + print_instruction(&ops[*insn], ir, &mut var_idx) + } + } + + log::debug!("============================"); + + for op in &ops { + print_instruction(op, ir, &mut var_idx) + } + + + // + // + // let mut operations_by_ref = HashMap::new(); + // + // for (idx, instruction) in ir.iter().enumerate() { + // log::trace!("inx({}): {:?}", idx, instruction); + // let effect = analyze_instruction(instruction); + // let pre_stack_size = data_flow_info[idx].stack_size; + // let post_stack_size = data_flow_info[idx + 1].stack_size; + // + // let variables = + // data_flow_info[idx + 1].variables.iter().map(|var| (var.location.clone(), *var)).collect::>(); + // + // let consumes = + // data_flow_info[idx + 1].consumes.iter().map(|consume| (consume.var().location.clone(), *consume)).collect::>(); + // + // for ref_count_op in effect.operations.iter() { + // log::trace!("op: {:?}", ref_count_op); + // match ref_count_op { + // Inc(disposition) => { + // + // let location = disposition.to_df_location(pre_stack_size, post_stack_size); + // + // log::trace!("location is: {:?}", location); + // operations_by_ref + // .entry(FlowNodeRef::Variable(variables[&location])) + // .or_insert(vec![]).push(ref_count_op.clone()) + // } + // Dec(disposition) => { + // let location = disposition.to_df_location(pre_stack_size, post_stack_size); + // + // if let Some(consume) = consumes.get(&location) { + // log::trace!("location is: {:?}", location); + // operations_by_ref + // .entry(FlowNodeRef::Consume(consume)) + // .or_insert(vec![]).push(ref_count_op.clone()) + // } + // } + // } + // } + // } + + +} + +fn print_instruction<'t>(op:&RefCountOp<'t>, ir: &Vec, var_idx: &mut VarIdx<'t>){ + match op { + Instruction(idx, effect) => { + let vars = effect.variables.iter().map(|var| format!("v{}", var_idx.var_id(var))).join(", "); + if !effect.consumes.is_empty() { + let reads = effect.consumes.iter().filter(|v| + matches!(v, FlowVariableConsume::Out(_)) + ).map(|v| { + match v { + Out(var) => { + format!("v{}", var_idx.var_id(var)) + } + _ => panic!("unexpected") + } + }).join(", "); + log::debug!("// Reads: {}", reads); + } + log::debug!("Instruction({}): {:?} -- {}", idx, ir[*idx], vars); + } + Inc(var) => { + log::debug!("Inc(v{})", var_idx.var_id(var)) + } + Dec(var) => { + log::debug!("Dec(v{})", var_idx.var_id(var)) + } + ProduceUncounted(var) => { + log::debug!("Produce(v{})", var_idx.var_id(var)) + } + Split(name, vars) => { + let v = vars.iter().map(|var| format!("v{}", var_idx.var_id(var))).join(", "); + log::debug!("Split({}) -- {}", name, v); + } + } +} + + +fn unwrap_var<'t>(var: &'t FlowVariable<'t>) -> &'t FlowVariable<'t> { + match &var.expression { + FlowVariableExpression::Variable(other) => { + unwrap_var(other) + } + FlowVariableExpression::Phi(_) => { + var + } + FlowVariableExpression::In => { var } + } +} + +struct VarIdx<'t> { + var_id: HashMap<&'t FlowVariable<'t>, usize>, + next: usize +} + +impl <'t> VarIdx<'t> { + fn var_id(&mut self, var: &'t FlowVariable<'t>) -> usize { + let next_id = &mut self.next; + let v = unwrap_var(var); + *self.var_id.entry(v).or_insert_with(|| { + let id = *next_id; + *next_id += 1; + id + }) + } +} + +impl<'t> FlowVariableConsume<'t> { + fn var(&self) -> &'t FlowVariable<'t> { + match self { + Unset(var) => *var, + Out(var) => *var + } + } +} + +#[derive(Clone, Eq, PartialEq, Hash)] +enum FlowNodeRef<'t> { + Variable(&'t FlowVariable<'t>), + Consume(&'t FlowVariableConsume<'t>) +} + +impl RefOpDisposition { + fn to_df_location(&self, pre_stack_size: usize, post_stack_size: usize) -> DFValueLocation { + match self { + DupPost(location) => location.to_df_location(pre_stack_size), + Post(location) => location.to_df_location(post_stack_size), + Pre(location) => location.to_df_location(pre_stack_size) + } + } +} + +impl ValueLocation { + fn to_df_location(&self, stack_size: usize) -> DFValueLocation { + match self { + Stack(position) => DFValueLocation::Stack(stack_size as u8 - 1 - *position), + Cache => DFValueLocation::Cache, + Local(idx) => DFValueLocation::Local(*idx), + Argument(idx) => DFValueLocation::Argument(*idx), + } + } +} + + +enum RefCountOp<'t> { + Instruction(usize, OperationEffect<'t>), + Inc(&'t FlowVariable<'t>), + Dec(&'t FlowVariable<'t>), + ProduceUncounted(&'t FlowVariable<'t>), + Split(String, Vec<&'t FlowVariable<'t>>), +} + + +fn analyze_instruction<'t>( + ir_idx: usize, + instruction: &DMIR, + dfa: &mut DataFlowAnalyzerState<'t, '_>, + out: &mut Vec> +) { + macro_rules! instruction_body { + () => { + out.push(Instruction(ir_idx, dfa.analyze_instruction(instruction))); + }; + } + + match instruction { + DMIR::GetLocal(idx) => { + instruction_body!(); + out.push(Inc(dfa.stack_top(0))); + } + DMIR::SetLocal(idx) => { + if let Some(local) = dfa.state().locals.get(idx) { + out.push(Dec(local)); + } + instruction_body!(); + }, + DMIR::GetSrc => { + instruction_body!(); + out.push(Inc(dfa.stack_top(0))); + }, + DMIR::GetArg(_) => { + instruction_body!(); + out.push(Inc(dfa.stack_top(0))); + }, + DMIR::SetArg(idx) => { + if let Some(argument) = dfa.state().arguments.get(*idx as usize) { + out.push(Dec(argument)); + } + instruction_body!(); + } + DMIR::FloatAdd | DMIR::FloatSub | + DMIR::FloatMul | DMIR::FloatDiv => { + let a = dfa.stack_top(0); + let b = dfa.stack_top(1); + instruction_body!(); + out.push(Dec(a)); + out.push(Dec(b)); + out.push(ProduceUncounted(dfa.stack_top(0))); + }, + DMIR::SetCache => { + if let Some(cache) = dfa.state().cache { + out.push(Dec(cache)); + } + instruction_body!(); + } + DMIR::GetCacheField(_) => { + instruction_body!(); + out.push(Inc(dfa.stack_top(0))); + } + DMIR::SetCacheField(_) => { + // TODO Q? + let value = dfa.stack_top(0); + instruction_body!(); + out.push(Dec(value)); + } + DMIR::PushCache => { + instruction_body!(); + out.push(Inc(dfa.stack_top(0))); + } + DMIR::PushVal(_) => { + instruction_body!(); + out.push(Inc(dfa.stack_top(0))); + } + + DMIR::CallProcById(_, _, _) | DMIR::CallProcByName(_, _, _) => { + let stack_top = dfa.stack_top(0); + instruction_body!(); + out.push(Dec(stack_top)) + } + DMIR::Pop => { + out.push(Dec(dfa.stack_top(0))); + instruction_body!(); + } + DMIR::End => { + if let Some(cache) = &dfa.state().cache { + out.push(Dec(cache)); + } + for (_, local) in dfa.state().locals.iter() { + out.push(Dec(local)); + } + for arg in dfa.state().arguments.iter() { + out.push(Dec(arg)); + } + instruction_body!(); + } + DMIR::UnsetCache => { + if let Some(cache) = &dfa.state().cache { + out.push(Dec(cache)); + } + instruction_body!(); + } + DMIR::UnsetLocal(idx) => { + out.push(Dec(&dfa.state().locals[idx])); + instruction_body!(); + } + DMIR::JZ(block) | + DMIR::JZInternal(block) | + DMIR::JNZInternal(block) => { + let mut vars: Vec<&FlowVariable> = vec![]; + if let Some(cache) = &dfa.state().cache { + vars.push(cache); + } + for (_, local) in dfa.state().locals.iter() { + vars.push(local); + } + for arg in dfa.state().arguments.iter() { + vars.push(arg); + } + out.push(Split(block.clone(), vars)); + instruction_body!(); + } + _ => { + instruction_body!(); + } + } +} \ No newline at end of file diff --git a/src/variable_termination_pass.rs b/src/variable_termination_pass.rs index 297e20a..a7822e7 100644 --- a/src/variable_termination_pass.rs +++ b/src/variable_termination_pass.rs @@ -363,7 +363,7 @@ impl <'t> AnalyzerState<'t> { block_ended = true; } DMIR::ValueTagSwitch(_, cases) =>{ - for (_, block) in cases.as_ref() { + for (_, block) in cases { self.merge_presences(pos, block.clone()); } block_ended = true; diff --git a/tests/testData/test_ref_count.dm b/tests/testData/test_ref_count.dm index b41e8a7..927dec4 100644 --- a/tests/testData/test_ref_count.dm +++ b/tests/testData/test_ref_count.dm @@ -13,7 +13,8 @@ compile_proc(/datum/base/proc/unbalanced_if) compile_proc(/proc/moves_arg) compile_proc(/proc/excess_args) - CHECK_INSTALL_COMPILED // RES: /receive_datum, /access_datum, /pass_datum, /store_restore_datum, /deopt_ret, /deopt_arg, /datum/base/deopt_src, /datum/base/call_nested, /datum/base/two_arg, /datum/base/unbalanced_if, /moves_arg, /excess_args + compile_proc(/proc/unbalanced_dup) + CHECK_INSTALL_COMPILED // RES: /receive_datum, /access_datum, /pass_datum, /store_restore_datum, /deopt_ret, /deopt_arg, /datum/base/deopt_src, /datum/base/call_nested, /datum/base/two_arg, /datum/base/unbalanced_if, /moves_arg, /excess_args, /unbalanced_dup var/datum/base/dt_local = new var/datum/base/dt_local_two = new @@ -69,6 +70,8 @@ RES_CHECK_LEAK(dt_local) // RES: OK RES_CHECK_LEAK(dt_local_two) // RES: OK + unbalanced_dup(dt_local) + /datum/base var/dt_next = null @@ -136,4 +139,15 @@ return a /proc/excess_args(a) - return a \ No newline at end of file + return a + +/proc/unbalanced_dup_new() + return new /datum/base +/proc/unbalanced_dup_res(r) + RES(r) + +/proc/unbalanced_dup() + var/a = unbalanced_dup_new() + var/b = a + a = null + unbalanced_dup_res(dmjit_get_ref_count(b)) \ No newline at end of file