Skip to content

Commit d4db23a

Browse files
committed
feat(r2ssa,r2dec): propagate semantic var hints into decompiler typing
1 parent f152ac1 commit d4db23a

2 files changed

Lines changed: 264 additions & 3 deletions

File tree

crates/r2dec/src/types.rs

Lines changed: 94 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
66
use std::collections::{HashMap, HashSet};
77

8+
use r2il::{PointerHint, ScalarKind};
89
use r2ssa::{SSAFunction, SSAOp, SSAVar};
910
use r2types::{
1011
CTypeLike, Constraint, ConstraintSource, ExternalTypeDb, MemoryCapability, ResolvedSignature,
@@ -119,6 +120,7 @@ impl TypeInference {
119120
let deref_consumers = collect_deref_consumers(func, &defs);
120121
let mut struct_hints: HashMap<SSAVar, String> = HashMap::new();
121122

123+
self.emit_semantic_hint_constraints(func, &mut arena, &mut constraints);
122124
self.emit_inferred_constraints(
123125
func,
124126
&defs,
@@ -148,6 +150,65 @@ impl TypeInference {
148150
self.solved_types = Some(solved);
149151
}
150152

153+
fn emit_semantic_hint_constraints(
154+
&self,
155+
func: &SSAFunction,
156+
arena: &mut TypeArena,
157+
constraints: &mut Vec<Constraint>,
158+
) {
159+
for var in collect_vars(func) {
160+
if var.is_const() {
161+
continue;
162+
}
163+
let Some(meta) = func.semantic_var_metadata(&var) else {
164+
continue;
165+
};
166+
167+
if let Some(pointer_hint) = meta.pointer_hint
168+
&& !matches!(pointer_hint, PointerHint::Unknown)
169+
{
170+
let pointee = self.integer_type_id(1, Signedness::Unknown, arena);
171+
constraints.push(Constraint::SetType {
172+
var: var.clone(),
173+
ty: arena.ptr(pointee),
174+
source: ConstraintSource::External,
175+
});
176+
continue;
177+
}
178+
179+
let Some(kind) = meta.scalar_kind else {
180+
continue;
181+
};
182+
let Some(ty) = self.semantic_scalar_kind_type(kind, var.size, arena) else {
183+
continue;
184+
};
185+
constraints.push(Constraint::SetType {
186+
var: var.clone(),
187+
ty,
188+
source: ConstraintSource::External,
189+
});
190+
}
191+
}
192+
193+
fn semantic_scalar_kind_type(
194+
&self,
195+
kind: ScalarKind,
196+
size: u32,
197+
arena: &mut TypeArena,
198+
) -> Option<TypeId> {
199+
match kind {
200+
ScalarKind::Bool => Some(arena.bool_ty()),
201+
ScalarKind::Float if size > 0 => Some(arena.float(size.saturating_mul(8))),
202+
ScalarKind::SignedInt => Some(self.integer_type_id(size, Signedness::Signed, arena)),
203+
ScalarKind::UnsignedInt => {
204+
Some(self.integer_type_id(size, Signedness::Unsigned, arena))
205+
}
206+
ScalarKind::Bitvector => Some(self.integer_type_id(size, Signedness::Unknown, arena)),
207+
ScalarKind::Unknown => None,
208+
ScalarKind::Float => None,
209+
}
210+
}
211+
151212
fn emit_inferred_constraints(
152213
&self,
153214
func: &SSAFunction,
@@ -1370,7 +1431,9 @@ fn struct_name_from_type(arena: &TypeArena, ty: TypeId) -> Option<&str> {
13701431
#[cfg(test)]
13711432
mod tests {
13721433
use super::*;
1373-
use r2il::{ArchSpec, R2ILBlock, R2ILOp, RegisterDef, SpaceId, Varnode};
1434+
use r2il::{
1435+
ArchSpec, PointerHint, R2ILBlock, R2ILOp, RegisterDef, SpaceId, Varnode, VarnodeMetadata,
1436+
};
13741437
use r2types::Type;
13751438

13761439
fn ssa_from_ops(ops: Vec<R2ILOp>, arch: Option<&ArchSpec>) -> SSAFunction {
@@ -1437,6 +1500,36 @@ mod tests {
14371500
assert_eq!(parse_const_addr("RAX_1"), None);
14381501
}
14391502

1503+
#[test]
1504+
fn test_semantic_pointer_hint_promotes_register_to_pointer_type() {
1505+
let mut hinted_src = Varnode::register(0x10, 8);
1506+
hinted_src.set_meta(VarnodeMetadata {
1507+
pointer_hint: Some(PointerHint::PointerLike),
1508+
..Default::default()
1509+
});
1510+
let func = ssa_from_ops(
1511+
vec![R2ILOp::Copy {
1512+
dst: Varnode::unique(0x10, 8),
1513+
src: hinted_src,
1514+
}],
1515+
None,
1516+
);
1517+
1518+
let mut ti = TypeInference::new(64);
1519+
ti.infer_function(&func);
1520+
1521+
let arg = func
1522+
.used_vars()
1523+
.into_iter()
1524+
.find(|v| v.name == "reg:10" && v.version == 0)
1525+
.expect("expected source register SSA var");
1526+
let ty = ti.get_type(&arg);
1527+
assert!(
1528+
ty.is_pointer(),
1529+
"semantic pointer hint should yield pointer type, got {ty}"
1530+
);
1531+
}
1532+
14401533
#[test]
14411534
fn test_emit_inferred_constraints_copy_emits_equal() {
14421535
let ti = TypeInference::new(64);

crates/r2ssa/src/function.rs

Lines changed: 170 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
77
use std::collections::HashMap;
88

9-
use r2il::{ArchSpec, R2ILBlock};
9+
use r2il::{ArchSpec, PointerHint, R2ILBlock, ScalarKind, StorageClass, Varnode, VarnodeMetadata};
1010
use serde::{Deserialize, Serialize};
1111

1212
use crate::cfg::{CFG, CFGEdge};
@@ -39,6 +39,8 @@ pub struct SSAFunction {
3939
blocks: HashMap<u64, SSABlock>,
4040
/// Block addresses in reverse postorder.
4141
block_order: Vec<u64>,
42+
/// Semantic metadata hints keyed by canonical SSA base variable name.
43+
semantic_var_hints: HashMap<String, VarnodeMetadata>,
4244
}
4345

4446
/// A basic block in SSA form.
@@ -99,6 +101,133 @@ pub struct DefRef<'a> {
99101
pub site: DefSite,
100102
}
101103

104+
fn pointer_hint_rank(hint: PointerHint) -> u8 {
105+
match hint {
106+
PointerHint::Unknown => 0,
107+
PointerHint::PointerLike => 1,
108+
PointerHint::CodePointer => 2,
109+
}
110+
}
111+
112+
fn scalar_kind_rank(kind: ScalarKind) -> u8 {
113+
match kind {
114+
ScalarKind::Unknown => 0,
115+
ScalarKind::Bitvector => 1,
116+
ScalarKind::Bool | ScalarKind::SignedInt | ScalarKind::UnsignedInt | ScalarKind::Float => 2,
117+
}
118+
}
119+
120+
fn storage_class_rank(class: StorageClass) -> u8 {
121+
match class {
122+
StorageClass::Unknown => 0,
123+
StorageClass::Register => 1,
124+
StorageClass::Stack
125+
| StorageClass::Heap
126+
| StorageClass::Global
127+
| StorageClass::ThreadLocal
128+
| StorageClass::ConstData
129+
| StorageClass::Volatile => 2,
130+
}
131+
}
132+
133+
fn merge_ranked_hint<T: Copy>(dst: &mut Option<T>, src: Option<T>, rank: impl Fn(T) -> u8) {
134+
let Some(src_val) = src else {
135+
return;
136+
};
137+
match *dst {
138+
Some(dst_val) if rank(dst_val) >= rank(src_val) => {}
139+
_ => *dst = Some(src_val),
140+
}
141+
}
142+
143+
fn merge_varnode_metadata(dst: &mut VarnodeMetadata, src: &VarnodeMetadata) {
144+
merge_ranked_hint(
145+
&mut dst.storage_class,
146+
src.storage_class,
147+
storage_class_rank,
148+
);
149+
merge_ranked_hint(&mut dst.pointer_hint, src.pointer_hint, pointer_hint_rank);
150+
merge_ranked_hint(&mut dst.scalar_kind, src.scalar_kind, scalar_kind_rank);
151+
152+
if dst.float_encoding.is_none() {
153+
dst.float_encoding = src.float_encoding;
154+
}
155+
if dst.endianness.is_none() {
156+
dst.endianness = src.endianness;
157+
}
158+
if dst.permissions.is_none() {
159+
dst.permissions = src.permissions;
160+
}
161+
if dst.valid_range.is_none() {
162+
dst.valid_range = src.valid_range.clone();
163+
}
164+
if dst.bank_id.is_none() {
165+
dst.bank_id = src.bank_id.clone();
166+
}
167+
if dst.segment_id.is_none() {
168+
dst.segment_id = src.segment_id.clone();
169+
}
170+
}
171+
172+
fn normalized_varnode_metadata(meta: &VarnodeMetadata) -> Option<VarnodeMetadata> {
173+
let mut out = meta.clone();
174+
if matches!(out.storage_class, Some(StorageClass::Unknown)) {
175+
out.storage_class = None;
176+
}
177+
if matches!(out.pointer_hint, Some(PointerHint::Unknown)) {
178+
out.pointer_hint = None;
179+
}
180+
if matches!(out.scalar_kind, Some(ScalarKind::Unknown)) {
181+
out.scalar_kind = None;
182+
}
183+
184+
let has_hint = out.storage_class.is_some()
185+
|| out.pointer_hint.is_some()
186+
|| out.scalar_kind.is_some()
187+
|| out.float_encoding.is_some()
188+
|| out.endianness.is_some()
189+
|| out.permissions.is_some()
190+
|| out.valid_range.is_some()
191+
|| out.bank_id.is_some()
192+
|| out.segment_id.is_some();
193+
194+
has_hint.then_some(out)
195+
}
196+
197+
fn collect_semantic_var_hints(
198+
blocks: &[R2ILBlock],
199+
reg_names: Option<&crate::naming::RegisterNameMap>,
200+
) -> HashMap<String, VarnodeMetadata> {
201+
let mut hints: HashMap<String, VarnodeMetadata> = HashMap::new();
202+
203+
let mut collect_var = |vn: &Varnode| {
204+
let Some(meta) = vn.meta.as_ref() else {
205+
return;
206+
};
207+
let Some(meta) = normalized_varnode_metadata(meta) else {
208+
return;
209+
};
210+
let key = crate::naming::varnode_to_name(vn, reg_names).to_ascii_lowercase();
211+
hints
212+
.entry(key)
213+
.and_modify(|existing| merge_varnode_metadata(existing, &meta))
214+
.or_insert(meta);
215+
};
216+
217+
for block in blocks {
218+
for op in &block.ops {
219+
if let Some(dst) = op.output() {
220+
collect_var(dst);
221+
}
222+
for src in op.inputs() {
223+
collect_var(src);
224+
}
225+
}
226+
}
227+
228+
hints
229+
}
230+
102231
impl SSAFunction {
103232
/// Build an SSA function from a sequence of r2il blocks.
104233
pub fn from_blocks(blocks: &[R2ILBlock]) -> Option<Self> {
@@ -144,6 +273,7 @@ impl SSAFunction {
144273

145274
let reg_names = arch.map(build_register_name_map);
146275
let reg_names_ref = reg_names.as_ref();
276+
let semantic_var_hints = collect_semantic_var_hints(blocks, reg_names_ref);
147277

148278
// Collect variable definitions and sizes
149279
let (defs, var_sizes) = collect_defs_from_cfg_with_names(&cfg, reg_names_ref);
@@ -203,6 +333,7 @@ impl SSAFunction {
203333
domtree,
204334
blocks: ssa_blocks,
205335
block_order: renamed.block_order,
336+
semantic_var_hints,
206337
})
207338
}
208339

@@ -244,6 +375,16 @@ impl SSAFunction {
244375
&self.block_order
245376
}
246377

378+
/// Look up semantic metadata hints for an SSA variable.
379+
pub fn semantic_var_metadata(&self, var: &SSAVar) -> Option<&VarnodeMetadata> {
380+
self.semantic_var_metadata_by_name(&var.name)
381+
}
382+
383+
/// Look up semantic metadata hints by canonical SSA base variable name.
384+
pub fn semantic_var_metadata_by_name(&self, name: &str) -> Option<&VarnodeMetadata> {
385+
self.semantic_var_hints.get(&name.to_ascii_lowercase())
386+
}
387+
247388
/// Get the number of blocks.
248389
pub fn num_blocks(&self) -> usize {
249390
self.blocks.len()
@@ -629,7 +770,7 @@ impl SSABlock {
629770
#[cfg(test)]
630771
mod tests {
631772
use super::*;
632-
use r2il::{R2ILOp, SpaceId, Varnode};
773+
use r2il::{PointerHint, R2ILOp, SpaceId, Varnode, VarnodeMetadata};
633774

634775
fn make_const(val: u64, size: u32) -> Varnode {
635776
Varnode {
@@ -926,6 +1067,33 @@ mod tests {
9261067
assert_eq!(func.idom(0x1008), Some(0x1000));
9271068
}
9281069

1070+
#[test]
1071+
fn test_semantic_var_metadata_is_collected_from_source_blocks() {
1072+
let mut src = make_reg(0x10, 8);
1073+
src.set_meta(VarnodeMetadata {
1074+
pointer_hint: Some(PointerHint::PointerLike),
1075+
..Default::default()
1076+
});
1077+
1078+
let blocks = vec![R2ILBlock {
1079+
addr: 0x1000,
1080+
size: 4,
1081+
ops: vec![R2ILOp::Copy {
1082+
dst: make_reg(0, 8),
1083+
src,
1084+
}],
1085+
switch_info: None,
1086+
op_metadata: Default::default(),
1087+
}];
1088+
1089+
let func = SSAFunction::from_blocks_raw_no_arch(&blocks).expect("raw SSA should build");
1090+
let meta = func
1091+
.semantic_var_metadata_by_name("reg:10")
1092+
.expect("expected semantic metadata for source register");
1093+
1094+
assert_eq!(meta.pointer_hint, Some(PointerHint::PointerLike));
1095+
}
1096+
9291097
#[test]
9301098
fn test_for_each_source_reports_phi_and_op_sites() {
9311099
let blocks = vec![

0 commit comments

Comments
 (0)