|
6 | 6 |
|
7 | 7 | use std::collections::HashMap; |
8 | 8 |
|
9 | | -use r2il::{ArchSpec, R2ILBlock}; |
| 9 | +use r2il::{ArchSpec, PointerHint, R2ILBlock, ScalarKind, StorageClass, Varnode, VarnodeMetadata}; |
10 | 10 | use serde::{Deserialize, Serialize}; |
11 | 11 |
|
12 | 12 | use crate::cfg::{CFG, CFGEdge}; |
@@ -39,6 +39,8 @@ pub struct SSAFunction { |
39 | 39 | blocks: HashMap<u64, SSABlock>, |
40 | 40 | /// Block addresses in reverse postorder. |
41 | 41 | block_order: Vec<u64>, |
| 42 | + /// Semantic metadata hints keyed by canonical SSA base variable name. |
| 43 | + semantic_var_hints: HashMap<String, VarnodeMetadata>, |
42 | 44 | } |
43 | 45 |
|
44 | 46 | /// A basic block in SSA form. |
@@ -99,6 +101,133 @@ pub struct DefRef<'a> { |
99 | 101 | pub site: DefSite, |
100 | 102 | } |
101 | 103 |
|
| 104 | +fn pointer_hint_rank(hint: PointerHint) -> u8 { |
| 105 | + match hint { |
| 106 | + PointerHint::Unknown => 0, |
| 107 | + PointerHint::PointerLike => 1, |
| 108 | + PointerHint::CodePointer => 2, |
| 109 | + } |
| 110 | +} |
| 111 | + |
| 112 | +fn scalar_kind_rank(kind: ScalarKind) -> u8 { |
| 113 | + match kind { |
| 114 | + ScalarKind::Unknown => 0, |
| 115 | + ScalarKind::Bitvector => 1, |
| 116 | + ScalarKind::Bool | ScalarKind::SignedInt | ScalarKind::UnsignedInt | ScalarKind::Float => 2, |
| 117 | + } |
| 118 | +} |
| 119 | + |
| 120 | +fn storage_class_rank(class: StorageClass) -> u8 { |
| 121 | + match class { |
| 122 | + StorageClass::Unknown => 0, |
| 123 | + StorageClass::Register => 1, |
| 124 | + StorageClass::Stack |
| 125 | + | StorageClass::Heap |
| 126 | + | StorageClass::Global |
| 127 | + | StorageClass::ThreadLocal |
| 128 | + | StorageClass::ConstData |
| 129 | + | StorageClass::Volatile => 2, |
| 130 | + } |
| 131 | +} |
| 132 | + |
| 133 | +fn merge_ranked_hint<T: Copy>(dst: &mut Option<T>, src: Option<T>, rank: impl Fn(T) -> u8) { |
| 134 | + let Some(src_val) = src else { |
| 135 | + return; |
| 136 | + }; |
| 137 | + match *dst { |
| 138 | + Some(dst_val) if rank(dst_val) >= rank(src_val) => {} |
| 139 | + _ => *dst = Some(src_val), |
| 140 | + } |
| 141 | +} |
| 142 | + |
| 143 | +fn merge_varnode_metadata(dst: &mut VarnodeMetadata, src: &VarnodeMetadata) { |
| 144 | + merge_ranked_hint( |
| 145 | + &mut dst.storage_class, |
| 146 | + src.storage_class, |
| 147 | + storage_class_rank, |
| 148 | + ); |
| 149 | + merge_ranked_hint(&mut dst.pointer_hint, src.pointer_hint, pointer_hint_rank); |
| 150 | + merge_ranked_hint(&mut dst.scalar_kind, src.scalar_kind, scalar_kind_rank); |
| 151 | + |
| 152 | + if dst.float_encoding.is_none() { |
| 153 | + dst.float_encoding = src.float_encoding; |
| 154 | + } |
| 155 | + if dst.endianness.is_none() { |
| 156 | + dst.endianness = src.endianness; |
| 157 | + } |
| 158 | + if dst.permissions.is_none() { |
| 159 | + dst.permissions = src.permissions; |
| 160 | + } |
| 161 | + if dst.valid_range.is_none() { |
| 162 | + dst.valid_range = src.valid_range.clone(); |
| 163 | + } |
| 164 | + if dst.bank_id.is_none() { |
| 165 | + dst.bank_id = src.bank_id.clone(); |
| 166 | + } |
| 167 | + if dst.segment_id.is_none() { |
| 168 | + dst.segment_id = src.segment_id.clone(); |
| 169 | + } |
| 170 | +} |
| 171 | + |
| 172 | +fn normalized_varnode_metadata(meta: &VarnodeMetadata) -> Option<VarnodeMetadata> { |
| 173 | + let mut out = meta.clone(); |
| 174 | + if matches!(out.storage_class, Some(StorageClass::Unknown)) { |
| 175 | + out.storage_class = None; |
| 176 | + } |
| 177 | + if matches!(out.pointer_hint, Some(PointerHint::Unknown)) { |
| 178 | + out.pointer_hint = None; |
| 179 | + } |
| 180 | + if matches!(out.scalar_kind, Some(ScalarKind::Unknown)) { |
| 181 | + out.scalar_kind = None; |
| 182 | + } |
| 183 | + |
| 184 | + let has_hint = out.storage_class.is_some() |
| 185 | + || out.pointer_hint.is_some() |
| 186 | + || out.scalar_kind.is_some() |
| 187 | + || out.float_encoding.is_some() |
| 188 | + || out.endianness.is_some() |
| 189 | + || out.permissions.is_some() |
| 190 | + || out.valid_range.is_some() |
| 191 | + || out.bank_id.is_some() |
| 192 | + || out.segment_id.is_some(); |
| 193 | + |
| 194 | + has_hint.then_some(out) |
| 195 | +} |
| 196 | + |
| 197 | +fn collect_semantic_var_hints( |
| 198 | + blocks: &[R2ILBlock], |
| 199 | + reg_names: Option<&crate::naming::RegisterNameMap>, |
| 200 | +) -> HashMap<String, VarnodeMetadata> { |
| 201 | + let mut hints: HashMap<String, VarnodeMetadata> = HashMap::new(); |
| 202 | + |
| 203 | + let mut collect_var = |vn: &Varnode| { |
| 204 | + let Some(meta) = vn.meta.as_ref() else { |
| 205 | + return; |
| 206 | + }; |
| 207 | + let Some(meta) = normalized_varnode_metadata(meta) else { |
| 208 | + return; |
| 209 | + }; |
| 210 | + let key = crate::naming::varnode_to_name(vn, reg_names).to_ascii_lowercase(); |
| 211 | + hints |
| 212 | + .entry(key) |
| 213 | + .and_modify(|existing| merge_varnode_metadata(existing, &meta)) |
| 214 | + .or_insert(meta); |
| 215 | + }; |
| 216 | + |
| 217 | + for block in blocks { |
| 218 | + for op in &block.ops { |
| 219 | + if let Some(dst) = op.output() { |
| 220 | + collect_var(dst); |
| 221 | + } |
| 222 | + for src in op.inputs() { |
| 223 | + collect_var(src); |
| 224 | + } |
| 225 | + } |
| 226 | + } |
| 227 | + |
| 228 | + hints |
| 229 | +} |
| 230 | + |
102 | 231 | impl SSAFunction { |
103 | 232 | /// Build an SSA function from a sequence of r2il blocks. |
104 | 233 | pub fn from_blocks(blocks: &[R2ILBlock]) -> Option<Self> { |
@@ -144,6 +273,7 @@ impl SSAFunction { |
144 | 273 |
|
145 | 274 | let reg_names = arch.map(build_register_name_map); |
146 | 275 | let reg_names_ref = reg_names.as_ref(); |
| 276 | + let semantic_var_hints = collect_semantic_var_hints(blocks, reg_names_ref); |
147 | 277 |
|
148 | 278 | // Collect variable definitions and sizes |
149 | 279 | let (defs, var_sizes) = collect_defs_from_cfg_with_names(&cfg, reg_names_ref); |
@@ -203,6 +333,7 @@ impl SSAFunction { |
203 | 333 | domtree, |
204 | 334 | blocks: ssa_blocks, |
205 | 335 | block_order: renamed.block_order, |
| 336 | + semantic_var_hints, |
206 | 337 | }) |
207 | 338 | } |
208 | 339 |
|
@@ -244,6 +375,16 @@ impl SSAFunction { |
244 | 375 | &self.block_order |
245 | 376 | } |
246 | 377 |
|
| 378 | + /// Look up semantic metadata hints for an SSA variable. |
| 379 | + pub fn semantic_var_metadata(&self, var: &SSAVar) -> Option<&VarnodeMetadata> { |
| 380 | + self.semantic_var_metadata_by_name(&var.name) |
| 381 | + } |
| 382 | + |
| 383 | + /// Look up semantic metadata hints by canonical SSA base variable name. |
| 384 | + pub fn semantic_var_metadata_by_name(&self, name: &str) -> Option<&VarnodeMetadata> { |
| 385 | + self.semantic_var_hints.get(&name.to_ascii_lowercase()) |
| 386 | + } |
| 387 | + |
247 | 388 | /// Get the number of blocks. |
248 | 389 | pub fn num_blocks(&self) -> usize { |
249 | 390 | self.blocks.len() |
@@ -629,7 +770,7 @@ impl SSABlock { |
629 | 770 | #[cfg(test)] |
630 | 771 | mod tests { |
631 | 772 | use super::*; |
632 | | - use r2il::{R2ILOp, SpaceId, Varnode}; |
| 773 | + use r2il::{PointerHint, R2ILOp, SpaceId, Varnode, VarnodeMetadata}; |
633 | 774 |
|
634 | 775 | fn make_const(val: u64, size: u32) -> Varnode { |
635 | 776 | Varnode { |
@@ -926,6 +1067,33 @@ mod tests { |
926 | 1067 | assert_eq!(func.idom(0x1008), Some(0x1000)); |
927 | 1068 | } |
928 | 1069 |
|
| 1070 | + #[test] |
| 1071 | + fn test_semantic_var_metadata_is_collected_from_source_blocks() { |
| 1072 | + let mut src = make_reg(0x10, 8); |
| 1073 | + src.set_meta(VarnodeMetadata { |
| 1074 | + pointer_hint: Some(PointerHint::PointerLike), |
| 1075 | + ..Default::default() |
| 1076 | + }); |
| 1077 | + |
| 1078 | + let blocks = vec![R2ILBlock { |
| 1079 | + addr: 0x1000, |
| 1080 | + size: 4, |
| 1081 | + ops: vec![R2ILOp::Copy { |
| 1082 | + dst: make_reg(0, 8), |
| 1083 | + src, |
| 1084 | + }], |
| 1085 | + switch_info: None, |
| 1086 | + op_metadata: Default::default(), |
| 1087 | + }]; |
| 1088 | + |
| 1089 | + let func = SSAFunction::from_blocks_raw_no_arch(&blocks).expect("raw SSA should build"); |
| 1090 | + let meta = func |
| 1091 | + .semantic_var_metadata_by_name("reg:10") |
| 1092 | + .expect("expected semantic metadata for source register"); |
| 1093 | + |
| 1094 | + assert_eq!(meta.pointer_hint, Some(PointerHint::PointerLike)); |
| 1095 | + } |
| 1096 | + |
929 | 1097 | #[test] |
930 | 1098 | fn test_for_each_source_reports_phi_and_op_sites() { |
931 | 1099 | let blocks = vec![ |
|
0 commit comments