Skip to content

Commit d74bf22

Browse files
committed
refactor(architecture/json): auto-generate human-readable instruction syntax
1 parent 496bbb5 commit d74bf22

4 files changed

Lines changed: 66 additions & 16 deletions

File tree

src/architecture.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -280,8 +280,6 @@ pub struct InstructionSyntax<'a, BitRange> {
280280
pub parser: crate::parser::Instruction,
281281
/// Translated instruction's syntax
282282
pub output_syntax: &'a str,
283-
/// User representation of the instruction's syntax
284-
pub user_syntax: String,
285283
/// Parameters of the instruction
286284
pub fields: Vec<InstructionField<'a, BitRange>>,
287285
}

src/architecture/json.rs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,6 @@ pub struct InstructionSyntax<'a, BitRange> {
143143
/// Syntax specification of the instruction. `[fF]\d+` is interpreted as the field with index
144144
/// `i` of the instruction. Other characters are interpreted literally. Ex: `F0 F3 F1 (F2)`
145145
pub signature_definition: &'a str,
146-
/// Same as `signature`, but replacing `[fF]\d+` with the field names
147-
#[serde(rename = "signatureRaw")]
148-
pub signature_raw: &'a str,
149146
/// Parameters of the instruction
150147
pub fields: Vec<super::InstructionField<'a, BitRange>>,
151148
}
@@ -159,7 +156,6 @@ impl<'a, T> TryFrom<InstructionSyntax<'a, T>> for super::InstructionSyntax<'a, T
159156
Ok(Self {
160157
parser,
161158
output_syntax: value.signature_definition,
162-
user_syntax: value.signature_raw,
163159
fields: value.fields,
164160
})
165161
}

src/compiler.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,14 +272,14 @@ fn parse_instruction<'a>(
272272
// Otherwise, store it in case this is the only matching definition
273273
possible_def = Some((inst, parsed_args));
274274
}
275-
Err(e) => errs.push((inst.syntax.user_syntax.to_string(), e)),
275+
Err(e) => errs.push((inst.syntax.parser.syntax().to_string(), e)),
276276
}
277277
}
278278
for inst in arch.find_pseudoinstructions(name.0) {
279279
match inst.syntax.parser.parse(args) {
280280
// If parsing is successful, assume this definition is the correct one and return it
281281
Ok(parsed_args) => return Ok((InstructionDefinition::Pseudo(inst), parsed_args)),
282-
Err(e) => errs.push((inst.syntax.user_syntax.to_string(), e)),
282+
Err(e) => errs.push((inst.syntax.parser.syntax().to_string(), e)),
283283
}
284284
}
285285
// None of the definitions matched perfectly. If there is a matching definition that failed due

src/parser/instruction.rs

Lines changed: 64 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
use chumsky::{input::MappedInput, prelude::*};
2626
use regex::Regex;
2727

28+
use std::fmt::Write;
2829
use std::sync::LazyLock;
2930

3031
use super::{expression, expression::Expr, lexer, ParseError, Span, Spanned, Token};
@@ -56,7 +57,12 @@ type BoxedParser<'src> = super::Parser!(boxed: 'src, TokenInput<'src>, ParsedArg
5657

5758
/// Instruction parser wrapper
5859
#[derive(Clone)]
59-
pub struct Instruction(BoxedParser<'static>);
60+
pub struct Instruction {
61+
/// Parser for the syntax
62+
parser: BoxedParser<'static>,
63+
/// Human-readable syntax
64+
syntax: String,
65+
}
6066

6167
/// Instruction statement AST node with references to data
6268
pub type InstructionNodeRef<'src> = (Spanned<&'src str>, Spanned<&'src [Spanned<Token>]>);
@@ -87,9 +93,11 @@ impl Instruction {
8793
/// # Errors
8894
///
8995
/// Errors if the syntax specification is invalid
96+
#[allow(clippy::missing_panics_doc)] // Function should never panic
9097
pub fn build<T>(fmt: &str, fields: &[InstructionField<T>]) -> Result<Self, &'static str> {
9198
// Regex for a instruction argument placeholder
9299
static FIELD: LazyLock<Regex> = crate::regex!(r"^[fF][0-9]+$");
100+
static WRITE_EXPECT: &str = "Writing to an in-memory vector can't fail";
93101

94102
// Gets the field number the placeholder points to and validates that it has a correct type
95103
let field = |ident: String, no_co: bool| -> Result<usize, _> {
@@ -117,6 +125,7 @@ impl Instruction {
117125

118126
// Creates an initial dummy parser that consumes no input
119127
let parser = any().ignored().or(end()).rewind();
128+
let mut syntax = String::with_capacity(fmt.len());
120129
// Validate the first token is a field placeholder pointing to the opcode/instruction name
121130
let mut parser = parser
122131
.to(match tokens.next() {
@@ -125,6 +134,7 @@ impl Instruction {
125134
let i = field(ident, false)?;
126135
match fields[i].r#type {
127136
FieldType::Co => {
137+
write!(syntax, "{} ", fields[i].name).expect(WRITE_EXPECT);
128138
// NOTE: This value should never be read, we only need it to point to the
129139
// opcode instruction field
130140
vec![ParsedArgument {
@@ -144,12 +154,18 @@ impl Instruction {
144154
.boxed();
145155

146156
// Iterate through the remaining tokens
157+
let mut prev_symbol = true;
147158
for token in tokens {
148159
// Append the current token parser to the parser being created
149160
parser = match token {
150161
// The current token is an argument placeholder => parse an expression/identifier
151162
Token::Identifier(ident) if FIELD.is_match(&ident) => {
152163
let field_idx = field(ident, true)?; // Validate the field pointed to
164+
if !prev_symbol {
165+
syntax.push(' ');
166+
}
167+
write!(syntax, "{}", fields[field_idx].name).expect(WRITE_EXPECT);
168+
prev_symbol = false;
153169
parser
154170
.then(expression::parser())
155171
.map(move |(mut args, value)| {
@@ -160,11 +176,36 @@ impl Instruction {
160176
}
161177
// The current token isn't an argument placeholder => parse it literally, ignoring
162178
// its output
163-
_ => parser.then_ignore(just(token)).boxed(),
179+
_ => {
180+
let symbol = matches!(
181+
token,
182+
Token::Operator(_) | Token::Ctrl(_) | Token::Literal(_)
183+
);
184+
185+
if !prev_symbol && !symbol {
186+
syntax.push(' ');
187+
}
188+
match &token {
189+
Token::Integer(n) => write!(syntax, "{n}"),
190+
Token::Float(x) => write!(syntax, "{}", f64::from(*x)),
191+
Token::String(s) => write!(syntax, "\"{s}\""),
192+
Token::Character(c) => write!(syntax, "\'{c}\'"),
193+
Token::Identifier(i) => write!(syntax, "{i}"),
194+
Token::Label(l) => write!(syntax, "{l}:"),
195+
Token::Directive(d) => write!(syntax, "{d}"),
196+
Token::Operator(c) => write!(syntax, "{c}"),
197+
Token::Ctrl(',') => write!(syntax, ", "),
198+
Token::Ctrl(c) | Token::Literal(c) => write!(syntax, "{c}"),
199+
}
200+
.expect(WRITE_EXPECT);
201+
prev_symbol = symbol;
202+
parser.then_ignore(just(token)).boxed()
203+
}
164204
}
165205
}
206+
syntax.truncate(syntax.trim_end().len());
166207
// Check that there is no remaining input in the syntax and create the final parser
167-
Ok(Self(parser))
208+
Ok(Self { parser, syntax })
168209
}
169210

170211
/// Parses the arguments of an instruction according to the syntax
@@ -193,7 +234,7 @@ impl Instruction {
193234
// SAFETY: This is safe because the stored parser has a lifetime of `'static`, so we will
194235
// only ever reduce its lifetime. Since lifetimes are removed during monomorphisation, the
195236
// parser must be valid for arbitrary lifetimes.
196-
unsafe { &*(&raw const self.0).cast() }
237+
unsafe { &*(&raw const self.parser).cast() }
197238
}
198239

199240
/// Lexes an instruction represented as a string
@@ -236,12 +277,20 @@ impl Instruction {
236277
let input = tokens.map(end, |(x, s)| (x, s));
237278
Ok(parser.parse(input).into_result()?)
238279
}
280+
281+
/// Returns a human-readable representation of the syntax
282+
#[must_use]
283+
pub fn syntax(&self) -> &str {
284+
&self.syntax
285+
}
239286
}
240287

241288
// Boxed parsers don't implement `Debug`, so we need to implement it manually as an opaque box
242289
impl std::fmt::Debug for Instruction {
243290
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
244-
f.debug_tuple("InstructionParser").finish()
291+
f.debug_tuple("InstructionParser")
292+
.field(&self.syntax)
293+
.finish()
245294
}
246295
}
247296

@@ -254,16 +303,16 @@ mod test {
254303

255304
#[must_use]
256305
fn fields() -> [InstructionField<'static, ()>; 3] {
257-
let field = |co| InstructionField {
258-
name: "",
306+
let field = |co, name| InstructionField {
307+
name,
259308
r#type: if co {
260309
FieldType::Co
261310
} else {
262311
FieldType::ImmSigned
263312
},
264313
range: (),
265314
};
266-
[field(true), field(false), field(false)]
315+
[field(true, "name"), field(false, "a"), field(false, "b")]
267316
}
268317

269318
fn parse(parser: &Instruction, src: &str) -> Result<ParsedArgs, ()> {
@@ -300,13 +349,15 @@ mod test {
300349
#[test]
301350
fn no_args() {
302351
let parser = Instruction::build("F0", &fields()).unwrap();
352+
assert_eq!(parser.syntax(), "name");
303353
assert_eq!(parse(&parser, ""), Ok(vec![co_arg()]));
304354
assert_eq!(parse(&parser, "a"), Err(()));
305355
}
306356

307357
#[test]
308358
fn one_arg() {
309359
let parser = Instruction::build("F0 F1", &fields()).unwrap();
360+
assert_eq!(parser.syntax(), "name a");
310361
assert_eq!(parse(&parser, ""), Err(()));
311362
assert_eq!(parse(&parser, ","), Err(()));
312363
assert_eq!(parse(&parser, "$"), Err(()));
@@ -357,6 +408,7 @@ mod test {
357408
#[test]
358409
fn multiple_arg() {
359410
let parser = Instruction::build("F0 F2 F1", &fields()).unwrap();
411+
assert_eq!(parser.syntax(), "name b a");
360412
assert_eq!(parse(&parser, ""), Err(()));
361413
assert_eq!(parse(&parser, ","), Err(()));
362414
assert_eq!(parse(&parser, "a"), Err(()));
@@ -382,6 +434,7 @@ mod test {
382434
#[test]
383435
fn comma_separator() {
384436
let parser = Instruction::build("F0 F1, F2", &fields()).unwrap();
437+
assert_eq!(parser.syntax(), "name a, b");
385438
assert_eq!(parse(&parser, "1 2"), Err(()));
386439
assert_eq!(
387440
parse(&parser, "1, 2"),
@@ -396,6 +449,7 @@ mod test {
396449
#[test]
397450
fn literals() {
398451
let parser = Instruction::build("F0 ,1 F1 $(F2)", &fields()).unwrap();
452+
assert_eq!(parser.syntax(), "name , 1 a$(b)");
399453
assert_eq!(parse(&parser, "2 5"), Err(()));
400454
assert_eq!(parse(&parser, ",1 2 5"), Err(()));
401455
assert_eq!(parse(&parser, ",1 2 (5)"), Err(()));
@@ -410,6 +464,7 @@ mod test {
410464
])
411465
);
412466
let parser = Instruction::build("F0 1 * -F1", &fields()).unwrap();
467+
assert_eq!(parser.syntax(), "name 1*-a");
413468
assert_eq!(parse(&parser, "2"), Err(()));
414469
assert_eq!(parse(&parser, "-2"), Err(()));
415470
assert_eq!(parse(&parser, "* -2"), Err(()));
@@ -420,6 +475,7 @@ mod test {
420475
Ok(vec![co_arg(), arg((number(2), 5..6), 1)])
421476
);
422477
let parser = Instruction::build("F0 aF1 F1a F2", &fields()).unwrap();
478+
assert_eq!(parser.syntax(), "name aF1 F1a b");
423479
assert_eq!(parse(&parser, "1 1 2"), Err(()));
424480
assert_eq!(parse(&parser, "a1 1a 2"), Err(()));
425481
assert_eq!(parse(&parser, "aF1 f1a 2"), Err(()));

0 commit comments

Comments
 (0)