xpat
This commit is contained in:
@@ -7,7 +7,8 @@ members = [
|
|||||||
"sub/libm",
|
"sub/libm",
|
||||||
"sub/pe",
|
"sub/pe",
|
||||||
"sub/winu",
|
"sub/winu",
|
||||||
"sub/_macros"
|
"sub/_macros",
|
||||||
|
"sub/xpat",
|
||||||
]
|
]
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
@@ -16,3 +17,4 @@ sub_libm = { path = "sub/libm" }
|
|||||||
sub_pe = { path = "sub/pe" }
|
sub_pe = { path = "sub/pe" }
|
||||||
sub_winu = { path = "sub/winu" }
|
sub_winu = { path = "sub/winu" }
|
||||||
sub_macros = { path = "sub/_macros" }
|
sub_macros = { path = "sub/_macros" }
|
||||||
|
sub_xpat = { path = "sub/xpat" }
|
||||||
@@ -1,6 +1,12 @@
|
|||||||
|
#![allow(unused)]
|
||||||
|
|
||||||
mod from_repr;
|
mod from_repr;
|
||||||
|
mod patterns;
|
||||||
|
|
||||||
|
#[proc_macro]
|
||||||
|
pub fn pattern(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
|
||||||
|
patterns::proc_pattern(input)
|
||||||
|
}
|
||||||
|
|
||||||
#[proc_macro_derive(FromRepr)]
|
#[proc_macro_derive(FromRepr)]
|
||||||
pub fn derive_from_repr(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
|
pub fn derive_from_repr(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
|
||||||
|
|||||||
530
sub/_macros/src/patterns.rs
Normal file
530
sub/_macros/src/patterns.rs
Normal file
@@ -0,0 +1,530 @@
|
|||||||
|
use core::{cmp, fmt, mem, str};
|
||||||
|
use proc_macro::{Literal, TokenStream, TokenTree};
|
||||||
|
|
||||||
|
/// Compile time pattern parser.
|
||||||
|
///
|
||||||
|
/// ```ignore
|
||||||
|
/// const PATTERN: &[pelite::pattern::Atom] = pattern!("pattern string");
|
||||||
|
/// ```
|
||||||
|
pub fn proc_pattern(input: TokenStream) -> TokenStream {
|
||||||
|
let input = input.into_iter().collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let string = match &input[..] {
|
||||||
|
[TokenTree::Literal(lit)] => parse_str_literal(&lit),
|
||||||
|
_ => panic!("expected a single string literal to parse"),
|
||||||
|
};
|
||||||
|
|
||||||
|
let pattern = match parse(&string) {
|
||||||
|
Ok(pattern) => pattern,
|
||||||
|
Err(err) => panic!("invalid pattern syntax: {}", err),
|
||||||
|
};
|
||||||
|
|
||||||
|
format!("{{ use x::xpat::Atom::*; &{:?} as x::Pattern }}", pattern).parse().unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_str_literal(input: &Literal) -> String {
|
||||||
|
let input = input.to_string();
|
||||||
|
let mut chars = input.chars();
|
||||||
|
let mut string = String::new();
|
||||||
|
if chars.next() != Some('"') {
|
||||||
|
panic!("expected string literal starting with a `\"` and no extraneous whitespace");
|
||||||
|
}
|
||||||
|
loop {
|
||||||
|
let chr = match chars.next() {
|
||||||
|
Some('\\') => {
|
||||||
|
match chars.next() {
|
||||||
|
Some('\\') => '\\',
|
||||||
|
Some('\'') => '\'',
|
||||||
|
Some('\"') => '\"',
|
||||||
|
Some('t') => '\t',
|
||||||
|
Some('r') => '\r',
|
||||||
|
Some('n') => '\n',
|
||||||
|
Some('u') => panic!("unicode escape sequence not supported"),
|
||||||
|
Some(chr) => panic!("unknown escape sequence: {}", chr),
|
||||||
|
None => panic!(""),
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Some('"') => break,
|
||||||
|
Some(chr) => chr,
|
||||||
|
None => panic!("unexpected end of string literal, missing `\"` terminator?"),
|
||||||
|
};
|
||||||
|
string.push(chr);
|
||||||
|
}
|
||||||
|
string
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Special skip value to indicate to use platform pointer size instead.
|
||||||
|
pub(crate) const PTR_SKIP: u8 = 0;
|
||||||
|
|
||||||
|
/// Pattern parsing error.
|
||||||
|
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||||
|
pub struct ParsePatError {
|
||||||
|
kind: PatError,
|
||||||
|
position: usize,
|
||||||
|
}
|
||||||
|
impl fmt::Display for ParsePatError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
write!(f, "Syntax Error @{}: {}.", self.position, self.kind.to_str())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||||
|
enum PatError {
|
||||||
|
UnpairedHexDigit,
|
||||||
|
UnknownChar,
|
||||||
|
ManyOverflow,
|
||||||
|
ManyRange,
|
||||||
|
ManyInvalid,
|
||||||
|
SaveOverflow,
|
||||||
|
StackError,
|
||||||
|
StackInvalid,
|
||||||
|
UnclosedQuote,
|
||||||
|
AlignedOperand,
|
||||||
|
CheckOperand,
|
||||||
|
ReadOperand,
|
||||||
|
SubPattern,
|
||||||
|
SubOverflow,
|
||||||
|
DoubleNibble
|
||||||
|
}
|
||||||
|
impl PatError {
|
||||||
|
fn to_str(self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
PatError::UnpairedHexDigit => "unpaired hex digit",
|
||||||
|
PatError::UnknownChar => "unknown character",
|
||||||
|
PatError::ManyOverflow => "many range exceeded",
|
||||||
|
PatError::ManyRange => "many bounds nonsensical",
|
||||||
|
PatError::ManyInvalid => "many invalid syntax",
|
||||||
|
PatError::SaveOverflow => "save store overflow",
|
||||||
|
PatError::StackError => "stack unbalanced",
|
||||||
|
PatError::StackInvalid => "stack must follow jump",
|
||||||
|
PatError::UnclosedQuote => "string missing end quote",
|
||||||
|
PatError::AlignedOperand => "aligned operand error",
|
||||||
|
PatError::CheckOperand => "aligned operand error",
|
||||||
|
PatError::ReadOperand => "read operand error",
|
||||||
|
PatError::SubPattern => "sub pattern error",
|
||||||
|
PatError::SubOverflow => "sub pattern too large",
|
||||||
|
PatError::DoubleNibble => "unpaired nibble wildcard",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------------------------------------
|
||||||
|
|
||||||
|
include!("../../xpat/src/atoms.rs");
|
||||||
|
|
||||||
|
/// Pattern parser.
|
||||||
|
///
|
||||||
|
/// # Remarks
|
||||||
|
///
|
||||||
|
/// Following are examples of the pattern syntax.
|
||||||
|
/// The syntax takes inspiration from [YARA hexadecimal strings](https://yara.readthedocs.io/en/v3.7.0/writingrules.html#hexadecimal-strings).
|
||||||
|
///
|
||||||
|
/// ```text
|
||||||
|
/// 55 89 e5 83 ? ec
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// Case insensitive hexadecimal characters match the exact byte pattern and question marks serve as placeholders for unknown bytes.
|
||||||
|
///
|
||||||
|
/// Note that a single question mark matches a whole byte. The syntax to mask part of a byte is not yet available.
|
||||||
|
///
|
||||||
|
/// Spaces (code point 32) are completely optional and carry no semantic meaning, their purpose is to visually group things together.
|
||||||
|
///
|
||||||
|
/// ```text
|
||||||
|
/// b9 ' 37 13 00 00
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// Single quotes are used as a bookmarks, to save the current cursor rva in the save array passed to the scanner.
|
||||||
|
///
|
||||||
|
/// It is no longer necessary to do tedious address calculations to read information out of the byte stream after a match was found.
|
||||||
|
/// This power really comes to life with the capability to follow relative and absolute references.
|
||||||
|
///
|
||||||
|
/// The first entry in the save array is reserved for the rva where the pattern was matched.
|
||||||
|
/// The rest of the save array is filled in order of appearance of the quotes. Here the rva of the quote can be found in `save[1]`.
|
||||||
|
///
|
||||||
|
/// ```text
|
||||||
|
/// b8 [16] 50 [13-42] ff
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// Pairs of decimal numbers separated by a hypen in square brackets indicate the lower and upper bound of number of bytes to skip.
|
||||||
|
/// The scanner is non greedy and considers the first match while skipping as little as possible.
|
||||||
|
///
|
||||||
|
/// A single decimal number in square brackets without hypens is a fixed size jump, equivalent to writing that number of consecutive question marks.
|
||||||
|
///
|
||||||
|
/// ```text
|
||||||
|
/// 31 c0 74 % ' c3
|
||||||
|
/// e8 $ ' 31 c0 c3
|
||||||
|
/// 68 * ' 31 c0 c3
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// These symbols are used to follow; a signed 1 byte relative jump: `%`, a signed 4 byte relative jump: `$` and an absolute pointer: `*`.
|
||||||
|
///
|
||||||
|
/// They are designed to be able to have the scanner follow short jumps, calls and longer jumps, and absolute pointers.
|
||||||
|
///
|
||||||
|
/// Composes really well with bookmarks to find the addresses of referenced functions and other data without tedious address calculations.
|
||||||
|
///
|
||||||
|
/// ```text
|
||||||
|
/// b8 * "STRING" 00
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// String literals appear in double quotes and will be matched as UTF-8.
|
||||||
|
///
|
||||||
|
/// Escape sequences are not supported, switch back to matching with hex digits as needed.
|
||||||
|
/// For UTF-16 support, you are welcome to send a PR.
|
||||||
|
///
|
||||||
|
/// ```text
|
||||||
|
/// e8 $ { ' } 83 f0 5c c3
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// Curly braces must follow a jump symbol (see above).
|
||||||
|
///
|
||||||
|
/// The sub pattern enclosed within the curly braces is matched at the destination after following the jump.
|
||||||
|
/// After the pattern successfully matched, the cursor returns to before the jump was followed.
|
||||||
|
/// The bytes defining the jump are skipped and matching continues again from here.
|
||||||
|
///
|
||||||
|
/// ```text
|
||||||
|
/// e8 $ @4
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// Checks that the cursor is aligned at this point in the scan.
|
||||||
|
/// The align value is `(1 << arg)`, in this example the cursor is checked to be aligned to 16.
|
||||||
|
///
|
||||||
|
/// ```text
|
||||||
|
/// e8 i1 a0 u4
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// An `i` or `u` indicates memory read operations followed by the size of the operand to read.
|
||||||
|
///
|
||||||
|
/// The read values are stored in the save array alongside the bookmarked addresses (single quotes).
|
||||||
|
/// This means the values are sign- or zero- extended respectively before being stored.
|
||||||
|
/// Operand sizes are 1 (byte), 2 (word) or 4 (dword).
|
||||||
|
///
|
||||||
|
/// The cursor is advanced by the size of the operand.
|
||||||
|
///
|
||||||
|
/// ```text
|
||||||
|
/// 83 c0 2a ( 6a ? | 68 ? ? ? ? ) e8
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// Parentheses indicate alternate subpatterns separated by a pipe character.
|
||||||
|
///
|
||||||
|
/// The scanner attempts to match the alternate subpatterns from left to right and fails if none of them match.
|
||||||
|
pub fn parse(pat: &str) -> Result<Vec<Atom>, ParsePatError> {
|
||||||
|
let mut result = Vec::with_capacity(pat.len() / 2);
|
||||||
|
let mut pat_end = pat;
|
||||||
|
match parse_helper(&mut pat_end, &mut result) {
|
||||||
|
Ok(()) => Ok(result),
|
||||||
|
Err(kind) => {
|
||||||
|
let position = pat_end.as_ptr() as usize - pat.as_ptr() as usize;
|
||||||
|
Err(ParsePatError { kind, position })
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// This is preferable but currently limited by macro rules...
|
||||||
|
// pub use crate::pattern as parse;
|
||||||
|
fn parse_helper(pat: &mut &str, result: &mut Vec<Atom>) -> Result<(), PatError> {
|
||||||
|
result.push(Atom::Save(0));
|
||||||
|
let mut iter = pat.as_bytes().iter();
|
||||||
|
let mut save = 1;
|
||||||
|
let mut depth = 0;
|
||||||
|
#[derive(Default)]
|
||||||
|
struct SubPattern {
|
||||||
|
case: usize,
|
||||||
|
brks: Vec<usize>,
|
||||||
|
save: u8,
|
||||||
|
save_next: u8,
|
||||||
|
depth: u8,
|
||||||
|
}
|
||||||
|
let mut subs = Vec::<SubPattern>::new();
|
||||||
|
while let Some(mut chr) = iter.next().cloned() {
|
||||||
|
match chr {
|
||||||
|
// Follow signed 1 byte jump
|
||||||
|
b'%' => result.push(Atom::Jump1),
|
||||||
|
// Follow signed 4 byte jump
|
||||||
|
b'$' => result.push(Atom::Jump4),
|
||||||
|
// Follow pointer
|
||||||
|
b'*' => result.push(Atom::Ptr),
|
||||||
|
// Start recursive operator
|
||||||
|
b'{' => {
|
||||||
|
depth += 1;
|
||||||
|
// Must follow a jump operator and insert push before the jump
|
||||||
|
let atom = match result.last_mut() {
|
||||||
|
Some(atom @ Atom::Jump1) => mem::replace(atom, Atom::Push(1)),
|
||||||
|
Some(atom @ Atom::Jump4) => mem::replace(atom, Atom::Push(4)),
|
||||||
|
Some(atom @ Atom::Ptr) => mem::replace(atom, Atom::Push(PTR_SKIP)),
|
||||||
|
_ => return Err(PatError::StackInvalid),
|
||||||
|
};
|
||||||
|
result.push(atom);
|
||||||
|
},
|
||||||
|
// End recursive operator
|
||||||
|
b'}' => {
|
||||||
|
// Unbalanced recursion
|
||||||
|
if depth <= 0 {
|
||||||
|
return Err(PatError::StackError);
|
||||||
|
}
|
||||||
|
depth -= 1;
|
||||||
|
result.push(Atom::Pop);
|
||||||
|
},
|
||||||
|
// Start subpattern
|
||||||
|
b'(' => {
|
||||||
|
subs.push(SubPattern::default());
|
||||||
|
let sub = subs.last_mut().unwrap();
|
||||||
|
// Keep the save and depth state
|
||||||
|
sub.save = save;
|
||||||
|
sub.depth = depth;
|
||||||
|
// Add a new case, update the case offset later
|
||||||
|
sub.case = result.len();
|
||||||
|
result.push(Atom::Case(0));
|
||||||
|
},
|
||||||
|
// Case subpattern
|
||||||
|
b'|' => {
|
||||||
|
// Should already have started a subpattern
|
||||||
|
let sub = subs.last_mut().ok_or(PatError::SubPattern)?;
|
||||||
|
// Update the save state
|
||||||
|
sub.save_next = cmp::max(sub.save_next, save);
|
||||||
|
save = sub.save;
|
||||||
|
depth = sub.depth;
|
||||||
|
// Add a break of the previous subpattern
|
||||||
|
sub.brks.push(result.len());
|
||||||
|
result.push(Atom::Break(0));
|
||||||
|
// Add a new case of the next subpattern
|
||||||
|
let case_offset = result.len() - sub.case - 1;
|
||||||
|
if case_offset >= 256 {
|
||||||
|
return Err(PatError::SubOverflow);
|
||||||
|
}
|
||||||
|
result[sub.case] = Atom::Case(case_offset as u8);
|
||||||
|
sub.case = result.len();
|
||||||
|
result.push(Atom::Case(0));
|
||||||
|
},
|
||||||
|
// End subpattern
|
||||||
|
b')' => {
|
||||||
|
// Should already have started a subpattern
|
||||||
|
let sub = subs.pop().ok_or(PatError::SubPattern)?;
|
||||||
|
// Prepare for the next save
|
||||||
|
save = cmp::max(sub.save_next, save);
|
||||||
|
depth = sub.depth;
|
||||||
|
// Neutralize the last case, since there are no more
|
||||||
|
result[sub.case] = Atom::Nop;
|
||||||
|
// Fill in the breaks
|
||||||
|
for &brk in &sub.brks {
|
||||||
|
let brk_offset = result.len() - brk - 1;
|
||||||
|
if brk_offset >= 256 {
|
||||||
|
return Err(PatError::SubOverflow);
|
||||||
|
}
|
||||||
|
result[brk] = Atom::Break(brk_offset as u8);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
// Skip many operator
|
||||||
|
b'[' => {
|
||||||
|
// Parse the lower bound
|
||||||
|
let mut lower_bound = 0u32;
|
||||||
|
let mut at_least_one_char = false;
|
||||||
|
loop {
|
||||||
|
chr = iter.next().cloned().ok_or(PatError::ManyInvalid)?;
|
||||||
|
match chr {
|
||||||
|
b'-' | b']' => break,
|
||||||
|
chr @ b'0'..=b'9' => {
|
||||||
|
at_least_one_char = true;
|
||||||
|
lower_bound = lower_bound * 10 + (chr - b'0') as u32;
|
||||||
|
if lower_bound >= 16384 {
|
||||||
|
return Err(PatError::ManyOverflow);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
_ => return Err(PatError::ManyInvalid),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !at_least_one_char {
|
||||||
|
return Err(PatError::ManyInvalid);
|
||||||
|
}
|
||||||
|
// Turn the lower bound into skip ops
|
||||||
|
if lower_bound > 0 {
|
||||||
|
if lower_bound >= 256 {
|
||||||
|
result.push(Atom::Rangext((lower_bound >> 8) as u8));
|
||||||
|
}
|
||||||
|
result.push(Atom::Skip((lower_bound & 0xff) as u8));
|
||||||
|
}
|
||||||
|
// Second many part is optional
|
||||||
|
if chr == b']' {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Parse the upper bound
|
||||||
|
let mut upper_bound = 0u32;
|
||||||
|
loop {
|
||||||
|
chr = iter.next().cloned().ok_or(PatError::ManyInvalid)?;
|
||||||
|
match chr {
|
||||||
|
b']' => break,
|
||||||
|
chr @ b'0'..=b'9' => {
|
||||||
|
upper_bound = upper_bound * 10 + (chr - b'0') as u32;
|
||||||
|
if upper_bound >= 16384 {
|
||||||
|
return Err(PatError::ManyOverflow);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
_ => return Err(PatError::ManyInvalid),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Lower bound should be strictly less than the upper bound
|
||||||
|
if lower_bound < upper_bound {
|
||||||
|
let many_skip = upper_bound - lower_bound;
|
||||||
|
if many_skip >= 256 {
|
||||||
|
result.push(Atom::Rangext((many_skip >> 8) as u8));
|
||||||
|
}
|
||||||
|
result.push(Atom::Many((many_skip & 0xff) as u8));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return Err(PatError::ManyRange);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
// Match a byte
|
||||||
|
b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' | b'.' => {
|
||||||
|
let mut mask = 0xFF;
|
||||||
|
|
||||||
|
// High nibble of the byte
|
||||||
|
let hi = if chr == b'.' { mask &= 0x0F;0 }
|
||||||
|
else if chr >= b'a' { chr - b'a' + 10 }
|
||||||
|
else if chr >= b'A' { chr - b'A' + 10 }
|
||||||
|
else { chr - b'0' };
|
||||||
|
|
||||||
|
chr = iter.next().cloned().ok_or(PatError::UnpairedHexDigit)?;
|
||||||
|
// Low nibble of the byte
|
||||||
|
let lo = if chr >= b'a' && chr <= b'f' { chr - b'a' + 10 }
|
||||||
|
else if chr >= b'A' && chr <= b'F' { chr - b'A' + 10 }
|
||||||
|
else if chr >= b'0' && chr <= b'9' { chr - b'0' }
|
||||||
|
else if chr == b'.' { mask &= 0xF0; 0 }
|
||||||
|
else { return Err(PatError::UnpairedHexDigit); };
|
||||||
|
|
||||||
|
if mask == 0 { return Err(PatError::DoubleNibble); };
|
||||||
|
|
||||||
|
// mask out nibble
|
||||||
|
if mask != 0xFF { result.push(Atom::Fuzzy(mask)) }
|
||||||
|
|
||||||
|
// Add byte to the pattern
|
||||||
|
result.push(Atom::Byte((hi << 4) + lo));
|
||||||
|
},
|
||||||
|
// Match raw bytes
|
||||||
|
b'"' => {
|
||||||
|
loop {
|
||||||
|
if let Some(chr) = iter.next().cloned() {
|
||||||
|
if chr != b'"' {
|
||||||
|
result.push(Atom::Byte(chr));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return Err(PatError::UnclosedQuote);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
// Save the cursor
|
||||||
|
b'\'' => {
|
||||||
|
// 'Limited' save space
|
||||||
|
if save >= u8::MAX {
|
||||||
|
return Err(PatError::SaveOverflow);
|
||||||
|
}
|
||||||
|
result.push(Atom::Save(save));
|
||||||
|
save += 1;
|
||||||
|
},
|
||||||
|
// Skip bytes
|
||||||
|
b'?' => {
|
||||||
|
// match result.last_mut() {
|
||||||
|
// Some(Atom::Skip(skip)) if *skip != PTR_SKIP && *skip < 127i8 => *skip += 1,
|
||||||
|
// _ => result.push(Atom::Skip(1)),
|
||||||
|
// };
|
||||||
|
// Coalescence skips together
|
||||||
|
if let Some(Atom::Skip(skip)) = result.last_mut() {
|
||||||
|
if *skip != PTR_SKIP && *skip < 255u8 {
|
||||||
|
*skip += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result.push(Atom::Skip(1));
|
||||||
|
},
|
||||||
|
|
||||||
|
b'=' => {
|
||||||
|
let op = iter.next().cloned().ok_or(PatError::CheckOperand)?;
|
||||||
|
result.push( match op {
|
||||||
|
b'0'..=b'9' => Atom::Check(op - b'0'),
|
||||||
|
b'A'..=b'Z' => Atom::Check(10 + (op - b'A')),
|
||||||
|
b'a'..=b'z' => Atom::Check(10 + (op - b'a')),
|
||||||
|
_ => return Err(PatError::CheckOperand)
|
||||||
|
});
|
||||||
|
},
|
||||||
|
b'@' => {
|
||||||
|
let op = iter.next().cloned().ok_or(PatError::AlignedOperand)?;
|
||||||
|
result.push( match op {
|
||||||
|
b'0'..=b'9' => Atom::Aligned(op - b'0'),
|
||||||
|
b'A'..=b'Z' => Atom::Aligned(10 + (op - b'A')),
|
||||||
|
b'a'..=b'z' => Atom::Aligned(10 + (op - b'a')),
|
||||||
|
_ => return Err(PatError::AlignedOperand)
|
||||||
|
});
|
||||||
|
},
|
||||||
|
b'i' => {
|
||||||
|
let atom = match iter.next().cloned() {
|
||||||
|
Some(b'1') => Atom::ReadI8(save),
|
||||||
|
Some(b'2') => Atom::ReadI16(save),
|
||||||
|
Some(b'4') => Atom::ReadI32(save),
|
||||||
|
_ => return Err(PatError::ReadOperand),
|
||||||
|
};
|
||||||
|
if save >= u8::MAX {
|
||||||
|
return Err(PatError::SaveOverflow);
|
||||||
|
}
|
||||||
|
save += 1;
|
||||||
|
result.push(atom);
|
||||||
|
},
|
||||||
|
b'u' => {
|
||||||
|
let atom = match iter.next().cloned() {
|
||||||
|
Some(b'1') => Atom::ReadU8(save),
|
||||||
|
Some(b'2') => Atom::ReadU16(save),
|
||||||
|
Some(b'4') => Atom::ReadU32(save),
|
||||||
|
_ => return Err(PatError::ReadOperand),
|
||||||
|
};
|
||||||
|
if save >= u8::MAX {
|
||||||
|
return Err(PatError::SaveOverflow);
|
||||||
|
}
|
||||||
|
save += 1;
|
||||||
|
result.push(atom);
|
||||||
|
},
|
||||||
|
b'z' => {
|
||||||
|
if save >= u8::MAX {
|
||||||
|
return Err(PatError::SaveOverflow);
|
||||||
|
}
|
||||||
|
result.push(Atom::Zero(save));
|
||||||
|
save += 1;
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
// Allow spaces as padding
|
||||||
|
b' ' | b'\n' | b'\r' | b'\t' => {},
|
||||||
|
// Everything else is illegal
|
||||||
|
_ => {
|
||||||
|
return Err(PatError::UnknownChar);
|
||||||
|
},
|
||||||
|
}
|
||||||
|
// Converted from str originally, should be safe
|
||||||
|
*pat = unsafe { str::from_utf8_unchecked(iter.as_slice()) };
|
||||||
|
}
|
||||||
|
// Check balanced stack operators
|
||||||
|
if depth != 0 {
|
||||||
|
return Err(PatError::StackError);
|
||||||
|
}
|
||||||
|
// Check if sub patterns are balanced
|
||||||
|
if subs.len() != 0 {
|
||||||
|
return Err(PatError::SubPattern);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove redundant atoms at the end
|
||||||
|
fn is_redundant(atom: &Atom) -> bool {
|
||||||
|
match atom {
|
||||||
|
| Atom::Skip(_)
|
||||||
|
| Atom::Rangext(_)
|
||||||
|
| Atom::Pop
|
||||||
|
| Atom::Many(_) => true,
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while result.last().map(is_redundant).unwrap_or(false) {
|
||||||
|
result.pop();
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
8
sub/xpat/Cargo.toml
Normal file
8
sub/xpat/Cargo.toml
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
[package]
|
||||||
|
name = "sub_xpat"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
sub_core.workspace = true
|
||||||
|
sub_macros.workspace = true
|
||||||
83
sub/xpat/src/atoms.rs
Normal file
83
sub/xpat/src/atoms.rs
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
pub type Pattern<'l> = &'l[Atom];
|
||||||
|
|
||||||
|
/// Pattern atoms.
|
||||||
|
///
|
||||||
|
/// The scanner will silently ignore nonsensical arguments.
|
||||||
|
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||||
|
pub enum Atom {
|
||||||
|
/// Matches a single byte.
|
||||||
|
Byte(u8),
|
||||||
|
/// Captures the cursor in the save array at the specified index.
|
||||||
|
Save(u8),
|
||||||
|
/// After a Pop later continue matching at the current cursor plus the argument.
|
||||||
|
Push(u8),
|
||||||
|
/// Pops the cursor from the stack and continues matching.
|
||||||
|
Pop,
|
||||||
|
/// Sets a mask to apply on next byte match.
|
||||||
|
Fuzzy(u8),
|
||||||
|
/// Skips a fixed number of bytes.
|
||||||
|
Skip(u8),
|
||||||
|
/// Rewinds the cursor a fixed number of bytes.
|
||||||
|
Back(u8),
|
||||||
|
/// Extends the push, skip, back and many range by `argument * 256`.
|
||||||
|
Rangext(u8),
|
||||||
|
/// Looks for the next pattern at most a certain number of bytes ahead.
|
||||||
|
Many(u8),
|
||||||
|
/// Follows a signed 1 byte jump.
|
||||||
|
///
|
||||||
|
/// Reads the byte under the cursor, sign extends it, adds it plus 1 to the cursor and continues matching.
|
||||||
|
Jump1,
|
||||||
|
/// Follows a signed 4 byte jump.
|
||||||
|
///
|
||||||
|
/// Reads the dword under the cursor and adds it plus 4 to the cursor and continues matching.
|
||||||
|
Jump4,
|
||||||
|
/// Follows an absolute pointer.
|
||||||
|
///
|
||||||
|
/// Reads the pointer under the cursor, translates it to an RVA, assigns it to the cursor and continues matching.
|
||||||
|
///
|
||||||
|
/// Matching fails immediately when translation to an RVA fails.
|
||||||
|
Ptr,
|
||||||
|
/// Follows a position independent reference.
|
||||||
|
///
|
||||||
|
/// Reads the dword under the cursor and adds it to the saved cursor for the given slot and continues matching.
|
||||||
|
Pir(u8),
|
||||||
|
/// Compares the cursor with the value in the given save slot and fails if they're not equal.
|
||||||
|
Check(u8),
|
||||||
|
/// Checks if the cursor is aligned to `(1 << value)`.
|
||||||
|
Aligned(u8),
|
||||||
|
/// Reads and sign-extends the byte under the cursor, writes to the given slot and advances the cursor by 1.
|
||||||
|
ReadI8(u8),
|
||||||
|
/// Reads and zero-extends the byte under the cursor, writes to the given slot and advances the cursor by 1.
|
||||||
|
ReadU8(u8),
|
||||||
|
/// Reads and sign-extends the word under the cursor, writes to the given slot and advances the cursor by 2.
|
||||||
|
ReadI16(u8),
|
||||||
|
/// Reads and zero-extends the word under the cursor, writes to the given slot and advances the cursor by 2.
|
||||||
|
ReadU16(u8),
|
||||||
|
/// Reads the dword under the cursor, writes to the given slot and advances the cursor by 4.
|
||||||
|
ReadI32(u8),
|
||||||
|
/// Reads the dword under the cursor, writes to the given slot and advances the cursor by 4.
|
||||||
|
ReadU32(u8),
|
||||||
|
/// Writes zero to the given save slot.
|
||||||
|
Zero(u8),
|
||||||
|
/// Sets a retry point when matching fails.
|
||||||
|
///
|
||||||
|
/// When matching fails the cursor is restored and matching begins again skipping _N_ atoms.
|
||||||
|
Case(u8),
|
||||||
|
/// Continues matching after a case atom, skipping the next _N_ atoms.
|
||||||
|
Break(u8),
|
||||||
|
/// Null instruction, used to make the parser easier to write.
|
||||||
|
Nop,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Atom {
|
||||||
|
pub fn save_len(pat: &[Atom]) -> usize {
|
||||||
|
pat.iter().filter_map(|&atom| {
|
||||||
|
match atom {
|
||||||
|
Atom::Save(slot) | Atom::Pir(slot) | Atom::Check(slot) | Atom::Zero(slot) |
|
||||||
|
Atom::ReadI8(slot) | Atom::ReadI16(slot) | Atom::ReadI32(slot) |
|
||||||
|
Atom::ReadU8(slot) | Atom::ReadU16(slot)| Atom::ReadU32(slot) => Some(slot as usize + 1),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}).max().unwrap_or(0)
|
||||||
|
}
|
||||||
|
}
|
||||||
28
sub/xpat/src/lib.rs
Normal file
28
sub/xpat/src/lib.rs
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
#![no_std] #![allow(unused)]
|
||||||
|
|
||||||
|
pub mod atoms {
|
||||||
|
include!("atoms.rs");
|
||||||
|
}
|
||||||
|
|
||||||
|
pub mod scannable;
|
||||||
|
pub mod scanner;
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// Export Preludes:
|
||||||
|
//
|
||||||
|
|
||||||
|
pub mod prelude {
|
||||||
|
pub use sub_macros::pattern;
|
||||||
|
pub use crate::atoms::Pattern;
|
||||||
|
pub use crate::scanner::Scanner;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub mod public {
|
||||||
|
pub use crate::atoms::Atom;
|
||||||
|
pub use crate::scannable::Scannable;
|
||||||
|
pub use crate::scanner::{
|
||||||
|
exec, scan_for_aob, make_aob
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
34
sub/xpat/src/scannable.rs
Normal file
34
sub/xpat/src/scannable.rs
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
use core::ops::Range;
|
||||||
|
|
||||||
|
pub trait Scannable {
|
||||||
|
/// get total bounds
|
||||||
|
fn range(&self) -> Range<usize>;
|
||||||
|
|
||||||
|
/// gets the chunk at the supplied address if there is one
|
||||||
|
fn chunk_at(&self, address: usize) -> Option<&[u8]>;
|
||||||
|
|
||||||
|
/// given an address will return the next chunk, None if there are no more hcunks
|
||||||
|
fn next_chunk(&self, address: usize) -> Option<(usize, &[u8])>;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
impl Scannable for [u8] {
|
||||||
|
fn range(&self) -> Range<usize> { 0..self.len() }
|
||||||
|
fn chunk_at(&self, address: usize) -> Option<&[u8]> {
|
||||||
|
self.get(address..)
|
||||||
|
}
|
||||||
|
fn next_chunk(&self, _address: usize) -> Option<(usize, &[u8])> { None }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// In case you want to scan with a specific address
|
||||||
|
impl Scannable for (usize, &[u8]) {
|
||||||
|
fn range(&self) -> Range<usize> { self.0..(self.0 + self.1.len()) }
|
||||||
|
fn chunk_at(&self, address: usize) -> Option<&[u8]> {
|
||||||
|
match address.overflowing_sub(self.0) {
|
||||||
|
(address, false) => self.1.get(address..),
|
||||||
|
(_, true) => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fn next_chunk(&self, _address: usize) -> Option<(usize, &[u8])> { None }
|
||||||
|
}
|
||||||
418
sub/xpat/src/scanner.rs
Normal file
418
sub/xpat/src/scanner.rs
Normal file
@@ -0,0 +1,418 @@
|
|||||||
|
use core::ops::{Range, RangeBounds, Bound};
|
||||||
|
use crate::atoms::{Pattern, Atom};
|
||||||
|
use crate::scannable::Scannable;
|
||||||
|
use sub_core::{pod::Pod};
|
||||||
|
|
||||||
|
const SKIP_VA: u32 = size_of::<usize>() as u32;
|
||||||
|
|
||||||
|
pub struct Scanner<'a, S: Scannable + ?Sized> {
|
||||||
|
/// the binary to be scanned
|
||||||
|
bin: &'a S,
|
||||||
|
|
||||||
|
/// the pattern
|
||||||
|
pat: Pattern<'a>,
|
||||||
|
|
||||||
|
/// the range to search for the pattern in
|
||||||
|
range: Range<usize>,
|
||||||
|
|
||||||
|
/// the current cursor position
|
||||||
|
cursor: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, S: Scannable + ?Sized> Scanner<'a, S> {
|
||||||
|
|
||||||
|
pub fn new(bin: &'a S, pat: Pattern<'a>, r: impl RangeBounds<usize>) -> Self {
|
||||||
|
let range = limit_range(bin, r);
|
||||||
|
let cursor = range.start;
|
||||||
|
Self { bin, pat, range, cursor }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn next(&mut self, saves: &mut [usize]) -> bool {
|
||||||
|
let mut aob = <[u8; 0x10] as Pod>::uninit();
|
||||||
|
let aob = make_aob(self.pat, &mut aob);
|
||||||
|
|
||||||
|
match !aob.is_empty() {
|
||||||
|
true => {
|
||||||
|
let upper_limit = self.range.end;
|
||||||
|
while let Some(address) = scan_for_aob(self.bin, self.cursor..upper_limit, aob) {
|
||||||
|
self.cursor = address + 1;
|
||||||
|
if exec(self.bin, address, self.pat, saves, self.range.clone()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
false => {
|
||||||
|
while self.range.contains(&self.cursor) {
|
||||||
|
let current_cursor = self.cursor;
|
||||||
|
self.cursor += 1;
|
||||||
|
if exec(self.bin, current_cursor, self.pat, saves, self.range.clone()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn exec<Binary: Scannable + ?Sized>(
|
||||||
|
bin: &Binary,
|
||||||
|
address: usize,
|
||||||
|
pattern: Pattern,
|
||||||
|
saves: &mut [usize],
|
||||||
|
range: Range<usize>,
|
||||||
|
) -> bool {
|
||||||
|
|
||||||
|
let mut cursor = address;
|
||||||
|
let mut pc = 0;
|
||||||
|
|
||||||
|
// pattern state
|
||||||
|
let mut mask = 0xff;
|
||||||
|
let mut ext_range = 0u32;
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn read<B: Scannable + ?Sized, T: Sized + Copy>(bin: &B, address: usize) -> Option<T> {
|
||||||
|
let slice = bin.chunk_at(address)?;
|
||||||
|
if slice.len() >= size_of::<T>() {
|
||||||
|
return Some(unsafe { (slice.as_ptr() as *const T).read_unaligned() });
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
while let Some(atom) = pattern.get(pc).cloned() {
|
||||||
|
pc += 1;
|
||||||
|
match atom {
|
||||||
|
|
||||||
|
// Compare bytes
|
||||||
|
Atom::Byte(pat_byte) => {
|
||||||
|
let Some(byte) = read::<_, u8>(bin, cursor) else { return false; };
|
||||||
|
if byte & mask != pat_byte & mask { return false; }
|
||||||
|
cursor += 1;
|
||||||
|
mask = 0xFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
// save the current address
|
||||||
|
Atom::Save(slot_idx) => {
|
||||||
|
if let Some(slot) = saves.get_mut(slot_idx as usize) {
|
||||||
|
*slot = cursor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Atom::Push(skip) => {
|
||||||
|
let skip = ext_range + skip as u32;
|
||||||
|
let skip = if skip == 0 { SKIP_VA } else { skip };
|
||||||
|
|
||||||
|
// start running the pattern from pc...
|
||||||
|
if !exec(bin, cursor, &pattern[pc..], saves, range.clone()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
cursor = cursor.wrapping_add(skip as usize);
|
||||||
|
mask = 0xff;
|
||||||
|
ext_range = 0;
|
||||||
|
|
||||||
|
// Iterate forward in the pattern looking for the POP for this push...
|
||||||
|
let mut counter = 1;
|
||||||
|
while counter != 0 {
|
||||||
|
// keep incrementing the pc so the next atom will be the one after pop
|
||||||
|
match pattern.get(pc) {
|
||||||
|
Some(Atom::Push(_)) => counter += 1,
|
||||||
|
Some(Atom::Pop) => counter -= 1,
|
||||||
|
None => return true,
|
||||||
|
_ => (/**/)
|
||||||
|
}
|
||||||
|
pc += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Atom::Pop => {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
Atom::Fuzzy(pat_mask) => {
|
||||||
|
mask = pat_mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
Atom::Skip(skip) => {
|
||||||
|
let skip = ext_range + skip as u32;
|
||||||
|
let skip = if skip == 0 { SKIP_VA } else { skip };
|
||||||
|
cursor = cursor.wrapping_add(skip as usize);
|
||||||
|
ext_range = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
Atom::Back(back) => {
|
||||||
|
let rewind = ext_range + back as u32;
|
||||||
|
let rewind = if rewind == 0 { SKIP_VA } else { rewind };
|
||||||
|
cursor = cursor.wrapping_sub(rewind as usize);
|
||||||
|
ext_range = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
Atom::Rangext(ext) => {
|
||||||
|
ext_range = ext as u32 * 256;
|
||||||
|
}
|
||||||
|
|
||||||
|
Atom::Many(limit) => {
|
||||||
|
let limit = ext_range + limit as u32;
|
||||||
|
return exec_many(bin, cursor, &pattern[pc..], saves, range, limit);
|
||||||
|
}
|
||||||
|
|
||||||
|
Atom::Jump1 => {
|
||||||
|
let Some(sbyte) = read::<_, i8>(bin, cursor) else { return false };
|
||||||
|
cursor = cursor.wrapping_add(sbyte as usize).wrapping_add(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
Atom::Jump4 => {
|
||||||
|
let Some(sdword) = read::<_, i32>(bin, cursor) else { return false };
|
||||||
|
cursor = cursor.wrapping_add(sdword as usize).wrapping_add(4);
|
||||||
|
}
|
||||||
|
|
||||||
|
Atom::Ptr => {
|
||||||
|
let Some(sptr) = read::<_, usize>(bin, cursor) else { return false };
|
||||||
|
cursor = sptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
Atom::Pir(slot) => {
|
||||||
|
let Some(sdword) = read::<_, i32>(bin, cursor) else { return false };
|
||||||
|
let base = saves.get(slot as usize).cloned().unwrap_or(cursor);
|
||||||
|
cursor = base.wrapping_add(sdword as usize);
|
||||||
|
}
|
||||||
|
|
||||||
|
Atom::Check(slot) => {
|
||||||
|
if let Some(&rva) = saves.get(slot as usize) {
|
||||||
|
if rva != cursor { return false; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Atom::Aligned(align) => {
|
||||||
|
if cursor & ((1 << align) - 1) != 0 {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Atom::ReadU8(slot) => {
|
||||||
|
let Some(value) = read::<_, u8>(bin, cursor) else { return false };
|
||||||
|
if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ }
|
||||||
|
}
|
||||||
|
Atom::ReadI8(slot) => {
|
||||||
|
let Some(value) = read::<_, i8>(bin, cursor) else { return false };
|
||||||
|
if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ }
|
||||||
|
}
|
||||||
|
Atom::ReadU16(slot) => {
|
||||||
|
let Some(value) = read::<_, u16>(bin, cursor) else { return false };
|
||||||
|
if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ }
|
||||||
|
}
|
||||||
|
Atom::ReadI16(slot) => {
|
||||||
|
let Some(value) = read::<_, i16>(bin, cursor) else { return false };
|
||||||
|
if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ }
|
||||||
|
}
|
||||||
|
Atom::ReadU32(slot) => {
|
||||||
|
let Some(value) = read::<_, u32>(bin, cursor) else { return false };
|
||||||
|
if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ }
|
||||||
|
}
|
||||||
|
Atom::ReadI32(slot) => {
|
||||||
|
let Some(value) = read::<_, i32>(bin, cursor) else { return false };
|
||||||
|
if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ }
|
||||||
|
}
|
||||||
|
Atom::Zero(slot) => {
|
||||||
|
if let Some(slot) = saves.get_mut(slot as usize) {
|
||||||
|
*slot = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Atom::Case(next) => {
|
||||||
|
if exec(bin, cursor, pattern, saves, range.clone()) {
|
||||||
|
// same as Push/Pop except we add the next from the break to the pc.
|
||||||
|
let mut counter = 1;
|
||||||
|
loop {
|
||||||
|
pc += 1;
|
||||||
|
match pattern.get(pc) {
|
||||||
|
Some(Atom::Case(_)) => counter += 1,
|
||||||
|
Some(Atom::Break(next)) => {
|
||||||
|
counter -= 1;
|
||||||
|
if counter == 0 {
|
||||||
|
pc += *next as usize
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => return true,
|
||||||
|
_ => (/**/)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// if the case fails go to the location defined by next
|
||||||
|
pc += next as usize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Atom::Break(_next) => {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
Atom::Nop => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn exec_many<Binary: Scannable + ?Sized >(
|
||||||
|
bin: &Binary,
|
||||||
|
address: usize,
|
||||||
|
pattern: Pattern,
|
||||||
|
saves: &mut [usize],
|
||||||
|
range: Range<usize>,
|
||||||
|
limit: u32,
|
||||||
|
) -> bool {
|
||||||
|
let mut aob = <[u8; 0x10] as Pod>::uninit();
|
||||||
|
let aob = make_aob(pattern, &mut aob);
|
||||||
|
|
||||||
|
let Some(chunk) = bin.chunk_at(address) else {
|
||||||
|
// pattern fails before we even try (out of bounds)
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
match !aob.is_empty() {
|
||||||
|
true => {
|
||||||
|
let upper_limit = address + limit as usize;
|
||||||
|
let mut cursor = address;
|
||||||
|
while let Some(address) = scan_for_aob(bin, cursor..upper_limit, aob) {
|
||||||
|
cursor = address;
|
||||||
|
if exec(bin, cursor, pattern, saves, range.clone()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
cursor += 1;
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
false => {
|
||||||
|
// try to reduce the limit just in-case we can squeeze some perf out of it
|
||||||
|
for i in 0..(limit as usize).min(chunk.len()) {
|
||||||
|
if exec(bin, address + i, pattern, saves, range.clone()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn scan_for_aob<Binary: Scannable + ?Sized>(
|
||||||
|
bin: &Binary,
|
||||||
|
range: Range<usize>,
|
||||||
|
aob: &[u8],
|
||||||
|
) -> Option<usize> {
|
||||||
|
let mut address = range.start;
|
||||||
|
let upper_bounds = range.end;
|
||||||
|
|
||||||
|
|
||||||
|
while address < upper_bounds {
|
||||||
|
|
||||||
|
// get the current chunk for the given address
|
||||||
|
let chunk = match bin.chunk_at(address) {
|
||||||
|
Some(chunk) => chunk,
|
||||||
|
|
||||||
|
// the address is out of bounds, try to shift the address so its back in b ounds
|
||||||
|
None => match bin.next_chunk(address) {
|
||||||
|
|
||||||
|
// the next chunk is in bounds so we will just correct the address and use that chunk instead
|
||||||
|
Some((naddr, nchunk)) if naddr < upper_bounds => {
|
||||||
|
address = naddr;
|
||||||
|
nchunk
|
||||||
|
}
|
||||||
|
|
||||||
|
// no hope, give up
|
||||||
|
_ => return None,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// try to find the aob in the current chunk
|
||||||
|
if let Some(offset) = chunk.windows(aob.len())
|
||||||
|
.take(upper_bounds.saturating_sub(address)).position(|c| c == aob) {
|
||||||
|
// we got a hit, return it
|
||||||
|
return Some(address + offset)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// the AOB was not found in the current chunk, now check if its contiguous between chunks:
|
||||||
|
if let Some((naddr, nchunk)) = bin.next_chunk(address) {
|
||||||
|
|
||||||
|
// next chunk is out of bounds, give up
|
||||||
|
if naddr - aob.len() > upper_bounds { return None }
|
||||||
|
|
||||||
|
// if chunks are contiguous and the aob is greater than one byte,
|
||||||
|
// check if the aob is on a chunk border
|
||||||
|
if address + chunk.len() == naddr && aob.len() > 1 {
|
||||||
|
// check if the aob is between two chunks :)
|
||||||
|
for i in 1..aob.len()-1 {
|
||||||
|
let (p1, p2) = aob.split_at(i);
|
||||||
|
if chunk.ends_with(p1) && nchunk.starts_with(p2) {
|
||||||
|
// aob was found between two chunks
|
||||||
|
// return this address
|
||||||
|
return Some(address + chunk.len() - i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// start scanning the next chunk
|
||||||
|
let naddr = naddr - aob.len();
|
||||||
|
debug_assert!(naddr > address);
|
||||||
|
address = naddr;
|
||||||
|
} else {
|
||||||
|
return None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Limits a selected range into the range of the binary...
|
||||||
|
fn limit_range<Binary: Scannable + ?Sized>(
|
||||||
|
bin: &Binary,
|
||||||
|
range: impl RangeBounds<usize>,
|
||||||
|
) -> Range<usize> {
|
||||||
|
let bin_range = bin.range();
|
||||||
|
let start = match range.start_bound() {
|
||||||
|
Bound::Included(v) => bin_range.start.max(*v),
|
||||||
|
Bound::Excluded(v) => bin_range.start.max(v.saturating_add(1)),
|
||||||
|
Bound::Unbounded => bin_range.start,
|
||||||
|
};
|
||||||
|
let end = match range.end_bound() {
|
||||||
|
Bound::Included(v) => bin_range.end.min(v.saturating_add(1)),
|
||||||
|
Bound::Excluded(v) => bin_range.end.min(*v),
|
||||||
|
Bound::Unbounded => bin_range.end
|
||||||
|
};
|
||||||
|
start..end
|
||||||
|
}
|
||||||
|
|
||||||
|
/// builds an array of bytes from the start of the pattern.
|
||||||
|
pub fn make_aob<'b>(pattern: &[Atom], buffer: &'b mut [u8]) -> &'b [u8] {
|
||||||
|
let mut i = 0;
|
||||||
|
for atoms in pattern {
|
||||||
|
match atoms {
|
||||||
|
Atom::Zero(_) => (/* do nothing */),
|
||||||
|
Atom::Save(_) => (/* do nothing */),
|
||||||
|
Atom::Byte(b) => {
|
||||||
|
buffer[i] = *b;
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
_ => break,
|
||||||
|
}
|
||||||
|
if i >= buffer.len() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
&buffer[..i]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -10,7 +10,7 @@ macros = ["sub_macros"]
|
|||||||
libm = ["sub_libm"]
|
libm = ["sub_libm"]
|
||||||
pe = ["sub_pe"]
|
pe = ["sub_pe"]
|
||||||
|
|
||||||
|
xpat = ["core", "sub_xpat", "macros"]
|
||||||
winuser = ["sub_winu", "pe", "sub_pe/windows"]
|
winuser = ["sub_winu", "pe", "sub_pe/windows"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
@@ -19,3 +19,4 @@ sub_libm = { workspace = true, optional = true}
|
|||||||
sub_pe = { workspace = true, optional = true }
|
sub_pe = { workspace = true, optional = true }
|
||||||
sub_winu = { workspace = true, optional = true }
|
sub_winu = { workspace = true, optional = true }
|
||||||
sub_macros = { workspace = true, optional = true }
|
sub_macros = { workspace = true, optional = true }
|
||||||
|
sub_xpat = { workspace = true, optional = true }
|
||||||
@@ -18,6 +18,7 @@ import!(sub_core, core, "core");
|
|||||||
import!(sub_libm, libm, "libm");
|
import!(sub_libm, libm, "libm");
|
||||||
import!(sub_pe, pe, "pe");
|
import!(sub_pe, pe, "pe");
|
||||||
import!(sub_winu, win, "winuser");
|
import!(sub_winu, win, "winuser");
|
||||||
|
import!(sub_xpat, xpat, "xpat");
|
||||||
|
|
||||||
/// the macro crate is a proc macro, so it is a bit different.
|
/// the macro crate is a proc macro, so it is a bit different.
|
||||||
#[cfg(feature = "macros")]
|
#[cfg(feature = "macros")]
|
||||||
|
|||||||
17
x/tests/test_xpat.rs
Normal file
17
x/tests/test_xpat.rs
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
pub fn test_pattern() {
|
||||||
|
|
||||||
|
let pattern = x::pattern!("E8 [0-4] BB ");
|
||||||
|
let buffer: &[u8] = &[ 0xAA, 0xE8, 0xBB, 0xE8, 0x00, 0xBB, ];
|
||||||
|
|
||||||
|
let mut scanner = x::Scanner::new(buffer, pattern, ..);
|
||||||
|
let mut saves = [0usize;8];
|
||||||
|
|
||||||
|
assert!(scanner.next(&mut saves));
|
||||||
|
assert_eq!(saves[0], 1);
|
||||||
|
assert!(scanner.next(&mut saves));
|
||||||
|
assert_eq!(saves[0], 3);
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user