Skip to content

Commit

Permalink
asm: fix sign-extended immediates (#10216)
Browse files Browse the repository at this point in the history
* asm: replace `emit_simm` with `Imm*::encode`

* asm: expand fuzz doc comment

* asm: pretty print sign-extended immediates

As mentioned in #10200, `capstone` has a peculiar way of pretty-printing
immediates, especially signed immediates. It is simpler (and perhaps
more clear) for us to just print immediates in one consistent format:
`0xffff...`, e.g. This change parses capstone's pretty-printed
immediates and converts them to our simpler format if the first attempt
to match this assembler's output with `capston` fails.

* asm: add types for sign-extended immediates

As pointed out in #10200, it could be confusing for users for
`cranelift-assembler-x64` to pass unsigned integers to certain assembler
instructions and have them unexpectedly sign-extended. Well, it can't be
too surprising since these instructions have a `_sx*` suffix, but this
change implements @alexcrichton's additional suggestion to create
separate types for the immediates that may be sign-extended.

These new types (`Simm8`, `Simm16`, `Simm32`) are quite similar to their
vanilla counterparts (`Imm8`, `Imm16`, `Imm32`) but have additional
sign-extension logic when pretty-printed. This means the vanilla
versions can be simplified and the pre-existing `Simm32` is renamed to
the more appropriate `AmodeOffset`.

* clippy: fix casts, find pattern
  • Loading branch information
abrown authored Feb 13, 2025
1 parent df0a5ba commit b0b5d8f
Show file tree
Hide file tree
Showing 21 changed files with 392 additions and 175 deletions.
1 change: 1 addition & 0 deletions cranelift/assembler-x64/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ pedantic = "warn"
module_name_repetitions = { level = "allow", priority = 1 }
similar_names = { level = "allow", priority = 1 }
wildcard_imports = { level = "allow", priority = 1 }
too_many_lines = { level = "allow", priority = 1 }

[features]
fuzz = ['dep:arbitrary', 'dep:capstone']
10 changes: 8 additions & 2 deletions cranelift/assembler-x64/meta/src/dsl/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,14 @@ pub enum Extension {
SignExtendQuad,
SignExtendLong,
SignExtendWord,
ZeroExtend,
}

impl Extension {
/// Check if the extension is sign-extended.
#[must_use]
pub fn is_sign_extended(&self) -> bool {
matches!(self, Self::SignExtendQuad | Self::SignExtendLong | Self::SignExtendWord)
}
}

impl Default for Extension {
Expand All @@ -365,7 +372,6 @@ impl core::fmt::Display for Extension {
Extension::SignExtendQuad => write!(f, "sxq"),
Extension::SignExtendLong => write!(f, "sxl"),
Extension::SignExtendWord => write!(f, "sxw"),
Extension::ZeroExtend => write!(f, "zx"),
}
}
}
3 changes: 3 additions & 0 deletions cranelift/assembler-x64/meta/src/generate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,11 @@ pub fn isle_macro(f: &mut Formatter, insts: &[dsl::Inst]) {
/// above.
pub fn isle_definitions(f: &mut Formatter, insts: &[dsl::Inst]) {
f.line("(type AssemblerImm8 extern (enum))", None);
f.line("(type AssemblerSimm8 extern (enum))", None);
f.line("(type AssemblerImm16 extern (enum))", None);
f.line("(type AssemblerSimm16 extern (enum))", None);
f.line("(type AssemblerImm32 extern (enum))", None);
f.line("(type AssemblerSimm32 extern (enum))", None);
f.line("(type AssemblerReadGpr extern (enum))", None);
f.line("(type AssemblerReadWriteGpr extern (enum))", None);
f.line("(type AssemblerReadGprMem extern (enum))", None);
Expand Down
8 changes: 1 addition & 7 deletions cranelift/assembler-x64/meta/src/generate/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,13 +178,7 @@ impl dsl::Format {
[_, Imm(imm)] => {
f.empty_line();
f.comment("Emit immediate.");
fmtln!(f, "let bytes = {};", imm.bytes());
if imm.bits() == 32 {
fmtln!(f, "let value = self.{imm}.value();");
} else {
fmtln!(f, "let value = u32::from(self.{imm}.value());");
};
fmtln!(f, "emit_simm(buf, bytes, value);");
fmtln!(f, "self.{imm}.encode(buf);");
}
unknown => {
// Do nothing: no immediates expected.
Expand Down
9 changes: 8 additions & 1 deletion cranelift/assembler-x64/meta/src/generate/inst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,14 @@ impl dsl::Inst {
.iter()
.filter_map(|o| match o.location.kind() {
FixedReg(_) => None,
Imm(loc) => Some(format!("AssemblerImm{}", loc.bits())),
Imm(loc) => {
let bits = loc.bits();
if o.extension.is_sign_extended() {
Some(format!("AssemblerSimm{bits}"))
} else {
Some(format!("AssemblerImm{bits}"))
}
}
Reg(_) => Some(format!("Assembler{}Gpr", o.mutability.generate_type())),
RegMem(_) => Some(format!("Assembler{}GprMem", o.mutability.generate_type())),
})
Expand Down
27 changes: 22 additions & 5 deletions cranelift/assembler-x64/meta/src/generate/operand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@ impl dsl::Operand {
use dsl::OperandKind::*;
match self.location.kind() {
FixedReg(_) => None,
Imm(loc) => Some(format!("Imm{}", loc.bits())),
Imm(loc) => {
let bits = loc.bits();
if self.extension.is_sign_extended() {
Some(format!("Simm{bits}"))
} else {
Some(format!("Imm{bits}"))
}
}
Reg(_) => Some(format!("Gpr<R::{}Gpr>", self.mutability.generate_type())),
RegMem(_) => Some(format!("GprMem<R::{}Gpr, R::ReadGpr>", self.mutability.generate_type())),
}
Expand All @@ -22,7 +29,14 @@ impl dsl::Operand {
};
match self.location.kind() {
FixedReg(_) => None,
Imm(loc) => Some(format!("Imm{}", loc.bits())),
Imm(loc) => {
let bits = loc.bits();
if self.extension.is_sign_extended() {
Some(format!("Simm{bits}"))
} else {
Some(format!("Imm{bits}"))
}
}
Reg(_) => Some(format!("Gpr<{pick_ty}>")),
RegMem(_) => Some(format!("GprMem<{pick_ty}, {read_ty}>")),
}
Expand Down Expand Up @@ -58,8 +72,12 @@ impl dsl::Location {
eax => "\"%eax\"".into(),
rax => "\"%rax\"".into(),
imm8 | imm16 | imm32 => {
let variant = extension.generate_variant();
format!("self.{self}.to_string({variant})")
if extension.is_sign_extended() {
let variant = extension.generate_variant();
format!("self.{self}.to_string({variant})")
} else {
format!("self.{self}.to_string()")
}
}
r8 | r16 | r32 | r64 | rm8 | rm16 | rm32 | rm64 => match self.generate_size() {
Some(size) => format!("self.{self}.to_string({size})"),
Expand Down Expand Up @@ -120,7 +138,6 @@ impl dsl::Extension {
SignExtendWord => "Extension::SignExtendWord",
SignExtendLong => "Extension::SignExtendLong",
SignExtendQuad => "Extension::SignExtendQuad",
ZeroExtend => "Extension::ZeroExtend",
}
}
}
86 changes: 81 additions & 5 deletions cranelift/assembler-x64/src/fuzz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
//! throughout this crate to avoid depending on the `arbitrary` crate
//! unconditionally (use the `fuzz` feature instead).
use crate::{AsReg, Gpr, Inst, NonRspGpr, Registers, Simm32, Simm32PlusKnownOffset};
use crate::{AmodeOffset, AmodeOffsetPlusKnownOffset, AsReg, Gpr, Inst, NonRspGpr, Registers};
use arbitrary::{Arbitrary, Result, Unstructured};
use capstone::{arch::x86, arch::BuildsCapstone, arch::BuildsCapstoneSyntax, Capstone};

Expand All @@ -21,10 +21,11 @@ pub fn roundtrip(inst: &Inst<FuzzRegs>) {
let assembled = assemble(inst);
let expected = disassemble(&assembled);

// Check that our pretty-printed output matches the known-good output.
// Check that our pretty-printed output matches the known-good output. Trim
// off the instruction offset first.
let expected = expected.split_once(' ').unwrap().1;
let actual = inst.to_string();
if expected != actual {
if expected != actual && expected != replace_signed_immediates(&actual) {
println!("> {inst}");
println!(" debug: {inst:x?}");
println!(" assembled: {}", pretty_print_hexadecimal(&assembled));
Expand Down Expand Up @@ -70,6 +71,81 @@ fn pretty_print_hexadecimal(hex: &[u8]) -> String {
s
}

/// See `replace_signed_immediates`.
macro_rules! hex_print_signed_imm {
($hex:expr, $from:ty => $to:ty) => {{
#[allow(clippy::cast_possible_wrap)]
let imm = <$from>::from_str_radix($hex, 16).unwrap() as $to;
let mut simm = String::new();
if imm < 0 {
simm.push_str("-");
}
let abs = match imm.checked_abs() {
Some(i) => i,
None => <$to>::MIN,
};
if imm > -10 && imm < 10 {
simm.push_str(&format!("{:x}", abs));
} else {
simm.push_str(&format!("0x{:x}", abs));
}
simm
}};
}

/// Replace signed immediates in the disassembly with their unsigned hexadecimal
/// equivalent. This is only necessary to match `capstone`'s complex
/// pretty-printing rules; e.g. `capstone` will:
/// - omit the `0x` prefix when printing `0x0` as `0`.
/// - omit the `0x` prefix when print small values (less than 10)
/// - print negative values as `-0x...` (signed hex) instead of `0xff...`
/// (normal hex)
fn replace_signed_immediates(dis: &str) -> std::borrow::Cow<str> {
match dis.find('$') {
None => dis.into(),
Some(idx) => {
let (prefix, rest) = dis.split_at(idx + 1); // Skip the '$'.
let (_, rest) = chomp("-", rest); // Skip the '-' if it's there.
let (_, rest) = chomp("0x", rest); // Skip the '0x' if it's there.
let n = rest.chars().take_while(char::is_ascii_hexdigit).count();
let (hex, rest) = rest.split_at(n); // Split at next non-hex character.
let simm = match hex.len() {
1 | 2 => hex_print_signed_imm!(hex, u8 => i8),
4 => hex_print_signed_imm!(hex, u16 => i16),
8 => hex_print_signed_imm!(hex, u32 => i32),
16 => hex_print_signed_imm!(hex, u64 => i64),
_ => panic!("unexpected length for hex: {hex}"),
};
format!("{prefix}{simm}{rest}").into()
}
}
}

// See `replace_signed_immediates`.
fn chomp<'a>(pat: &str, s: &'a str) -> (&'a str, &'a str) {
if s.starts_with(pat) {
s.split_at(pat.len())
} else {
("", s)
}
}

#[test]
fn replace() {
assert_eq!(
replace_signed_immediates("andl $0xffffff9a, %r11d"),
"andl $-0x66, %r11d"
);
assert_eq!(
replace_signed_immediates("xorq $0xffffffffffffffbc, 0x7f139ecc(%r9)"),
"xorq $-0x44, 0x7f139ecc(%r9)"
);
assert_eq!(
replace_signed_immediates("subl $0x3ca77a19, -0x1a030f40(%r14)"),
"subl $0x3ca77a19, -0x1a030f40(%r14)"
);
}

/// Fuzz-specific registers.
///
/// For the fuzzer, we do not need any fancy register types; see [`FuzzReg`].
Expand Down Expand Up @@ -100,11 +176,11 @@ impl AsReg for FuzzReg {
}
}

impl Arbitrary<'_> for Simm32PlusKnownOffset {
impl Arbitrary<'_> for AmodeOffsetPlusKnownOffset {
fn arbitrary(u: &mut Unstructured<'_>) -> Result<Self> {
// For now, we don't generate offsets (TODO).
Ok(Self {
simm32: Simm32::arbitrary(u)?,
simm32: AmodeOffset::arbitrary(u)?,
offset: None,
})
}
Expand Down
Loading

0 comments on commit b0b5d8f

Please sign in to comment.