diff --git a/Cargo.toml b/Cargo.toml index 1a59156..087d8b2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,8 @@ members = [ "sub/libm", "sub/pe", "sub/winu", - "sub/_macros" + "sub/_macros", + "sub/xpat", ] [workspace.dependencies] @@ -16,3 +17,4 @@ sub_libm = { path = "sub/libm" } sub_pe = { path = "sub/pe" } sub_winu = { path = "sub/winu" } sub_macros = { path = "sub/_macros" } +sub_xpat = { path = "sub/xpat" } \ No newline at end of file diff --git a/sub/_macros/src/lib.rs b/sub/_macros/src/lib.rs index e0ad1d4..d712b46 100644 --- a/sub/_macros/src/lib.rs +++ b/sub/_macros/src/lib.rs @@ -1,6 +1,12 @@ +#![allow(unused)] mod from_repr; +mod patterns; +#[proc_macro] +pub fn pattern(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + patterns::proc_pattern(input) +} #[proc_macro_derive(FromRepr)] pub fn derive_from_repr(input: proc_macro::TokenStream) -> proc_macro::TokenStream { diff --git a/sub/_macros/src/patterns.rs b/sub/_macros/src/patterns.rs new file mode 100644 index 0000000..14dc055 --- /dev/null +++ b/sub/_macros/src/patterns.rs @@ -0,0 +1,530 @@ +use core::{cmp, fmt, mem, str}; +use proc_macro::{Literal, TokenStream, TokenTree}; + +/// Compile time pattern parser. +/// +/// ```ignore +/// const PATTERN: &[pelite::pattern::Atom] = pattern!("pattern string"); +/// ``` +pub fn proc_pattern(input: TokenStream) -> TokenStream { + let input = input.into_iter().collect::>(); + + let string = match &input[..] { + [TokenTree::Literal(lit)] => parse_str_literal(&lit), + _ => panic!("expected a single string literal to parse"), + }; + + let pattern = match parse(&string) { + Ok(pattern) => pattern, + Err(err) => panic!("invalid pattern syntax: {}", err), + }; + + format!("{{ use x::xpat::Atom::*; &{:?} as x::Pattern }}", pattern).parse().unwrap() +} + +fn parse_str_literal(input: &Literal) -> String { + let input = input.to_string(); + let mut chars = input.chars(); + let mut string = String::new(); + if chars.next() != Some('"') { + panic!("expected string literal starting with a `\"` and no extraneous whitespace"); + } + loop { + let chr = match chars.next() { + Some('\\') => { + match chars.next() { + Some('\\') => '\\', + Some('\'') => '\'', + Some('\"') => '\"', + Some('t') => '\t', + Some('r') => '\r', + Some('n') => '\n', + Some('u') => panic!("unicode escape sequence not supported"), + Some(chr) => panic!("unknown escape sequence: {}", chr), + None => panic!(""), + } + }, + Some('"') => break, + Some(chr) => chr, + None => panic!("unexpected end of string literal, missing `\"` terminator?"), + }; + string.push(chr); + } + string +} + + +/// Special skip value to indicate to use platform pointer size instead. +pub(crate) const PTR_SKIP: u8 = 0; + +/// Pattern parsing error. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub struct ParsePatError { + kind: PatError, + position: usize, +} +impl fmt::Display for ParsePatError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "Syntax Error @{}: {}.", self.position, self.kind.to_str()) + } +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +enum PatError { + UnpairedHexDigit, + UnknownChar, + ManyOverflow, + ManyRange, + ManyInvalid, + SaveOverflow, + StackError, + StackInvalid, + UnclosedQuote, + AlignedOperand, + CheckOperand, + ReadOperand, + SubPattern, + SubOverflow, + DoubleNibble +} +impl PatError { + fn to_str(self) -> &'static str { + match self { + PatError::UnpairedHexDigit => "unpaired hex digit", + PatError::UnknownChar => "unknown character", + PatError::ManyOverflow => "many range exceeded", + PatError::ManyRange => "many bounds nonsensical", + PatError::ManyInvalid => "many invalid syntax", + PatError::SaveOverflow => "save store overflow", + PatError::StackError => "stack unbalanced", + PatError::StackInvalid => "stack must follow jump", + PatError::UnclosedQuote => "string missing end quote", + PatError::AlignedOperand => "aligned operand error", + PatError::CheckOperand => "aligned operand error", + PatError::ReadOperand => "read operand error", + PatError::SubPattern => "sub pattern error", + PatError::SubOverflow => "sub pattern too large", + PatError::DoubleNibble => "unpaired nibble wildcard", + } + } +} + +//---------------------------------------------------------------- + +include!("../../xpat/src/atoms.rs"); + +/// Pattern parser. +/// +/// # Remarks +/// +/// Following are examples of the pattern syntax. +/// The syntax takes inspiration from [YARA hexadecimal strings](https://yara.readthedocs.io/en/v3.7.0/writingrules.html#hexadecimal-strings). +/// +/// ```text +/// 55 89 e5 83 ? ec +/// ``` +/// +/// Case insensitive hexadecimal characters match the exact byte pattern and question marks serve as placeholders for unknown bytes. +/// +/// Note that a single question mark matches a whole byte. The syntax to mask part of a byte is not yet available. +/// +/// Spaces (code point 32) are completely optional and carry no semantic meaning, their purpose is to visually group things together. +/// +/// ```text +/// b9 ' 37 13 00 00 +/// ``` +/// +/// Single quotes are used as a bookmarks, to save the current cursor rva in the save array passed to the scanner. +/// +/// It is no longer necessary to do tedious address calculations to read information out of the byte stream after a match was found. +/// This power really comes to life with the capability to follow relative and absolute references. +/// +/// The first entry in the save array is reserved for the rva where the pattern was matched. +/// The rest of the save array is filled in order of appearance of the quotes. Here the rva of the quote can be found in `save[1]`. +/// +/// ```text +/// b8 [16] 50 [13-42] ff +/// ``` +/// +/// Pairs of decimal numbers separated by a hypen in square brackets indicate the lower and upper bound of number of bytes to skip. +/// The scanner is non greedy and considers the first match while skipping as little as possible. +/// +/// A single decimal number in square brackets without hypens is a fixed size jump, equivalent to writing that number of consecutive question marks. +/// +/// ```text +/// 31 c0 74 % ' c3 +/// e8 $ ' 31 c0 c3 +/// 68 * ' 31 c0 c3 +/// ``` +/// +/// These symbols are used to follow; a signed 1 byte relative jump: `%`, a signed 4 byte relative jump: `$` and an absolute pointer: `*`. +/// +/// They are designed to be able to have the scanner follow short jumps, calls and longer jumps, and absolute pointers. +/// +/// Composes really well with bookmarks to find the addresses of referenced functions and other data without tedious address calculations. +/// +/// ```text +/// b8 * "STRING" 00 +/// ``` +/// +/// String literals appear in double quotes and will be matched as UTF-8. +/// +/// Escape sequences are not supported, switch back to matching with hex digits as needed. +/// For UTF-16 support, you are welcome to send a PR. +/// +/// ```text +/// e8 $ { ' } 83 f0 5c c3 +/// ``` +/// +/// Curly braces must follow a jump symbol (see above). +/// +/// The sub pattern enclosed within the curly braces is matched at the destination after following the jump. +/// After the pattern successfully matched, the cursor returns to before the jump was followed. +/// The bytes defining the jump are skipped and matching continues again from here. +/// +/// ```text +/// e8 $ @4 +/// ``` +/// +/// Checks that the cursor is aligned at this point in the scan. +/// The align value is `(1 << arg)`, in this example the cursor is checked to be aligned to 16. +/// +/// ```text +/// e8 i1 a0 u4 +/// ``` +/// +/// An `i` or `u` indicates memory read operations followed by the size of the operand to read. +/// +/// The read values are stored in the save array alongside the bookmarked addresses (single quotes). +/// This means the values are sign- or zero- extended respectively before being stored. +/// Operand sizes are 1 (byte), 2 (word) or 4 (dword). +/// +/// The cursor is advanced by the size of the operand. +/// +/// ```text +/// 83 c0 2a ( 6a ? | 68 ? ? ? ? ) e8 +/// ``` +/// +/// Parentheses indicate alternate subpatterns separated by a pipe character. +/// +/// The scanner attempts to match the alternate subpatterns from left to right and fails if none of them match. +pub fn parse(pat: &str) -> Result, ParsePatError> { + let mut result = Vec::with_capacity(pat.len() / 2); + let mut pat_end = pat; + match parse_helper(&mut pat_end, &mut result) { + Ok(()) => Ok(result), + Err(kind) => { + let position = pat_end.as_ptr() as usize - pat.as_ptr() as usize; + Err(ParsePatError { kind, position }) + }, + } +} +// This is preferable but currently limited by macro rules... +// pub use crate::pattern as parse; +fn parse_helper(pat: &mut &str, result: &mut Vec) -> Result<(), PatError> { + result.push(Atom::Save(0)); + let mut iter = pat.as_bytes().iter(); + let mut save = 1; + let mut depth = 0; + #[derive(Default)] + struct SubPattern { + case: usize, + brks: Vec, + save: u8, + save_next: u8, + depth: u8, + } + let mut subs = Vec::::new(); + while let Some(mut chr) = iter.next().cloned() { + match chr { + // Follow signed 1 byte jump + b'%' => result.push(Atom::Jump1), + // Follow signed 4 byte jump + b'$' => result.push(Atom::Jump4), + // Follow pointer + b'*' => result.push(Atom::Ptr), + // Start recursive operator + b'{' => { + depth += 1; + // Must follow a jump operator and insert push before the jump + let atom = match result.last_mut() { + Some(atom @ Atom::Jump1) => mem::replace(atom, Atom::Push(1)), + Some(atom @ Atom::Jump4) => mem::replace(atom, Atom::Push(4)), + Some(atom @ Atom::Ptr) => mem::replace(atom, Atom::Push(PTR_SKIP)), + _ => return Err(PatError::StackInvalid), + }; + result.push(atom); + }, + // End recursive operator + b'}' => { + // Unbalanced recursion + if depth <= 0 { + return Err(PatError::StackError); + } + depth -= 1; + result.push(Atom::Pop); + }, + // Start subpattern + b'(' => { + subs.push(SubPattern::default()); + let sub = subs.last_mut().unwrap(); + // Keep the save and depth state + sub.save = save; + sub.depth = depth; + // Add a new case, update the case offset later + sub.case = result.len(); + result.push(Atom::Case(0)); + }, + // Case subpattern + b'|' => { + // Should already have started a subpattern + let sub = subs.last_mut().ok_or(PatError::SubPattern)?; + // Update the save state + sub.save_next = cmp::max(sub.save_next, save); + save = sub.save; + depth = sub.depth; + // Add a break of the previous subpattern + sub.brks.push(result.len()); + result.push(Atom::Break(0)); + // Add a new case of the next subpattern + let case_offset = result.len() - sub.case - 1; + if case_offset >= 256 { + return Err(PatError::SubOverflow); + } + result[sub.case] = Atom::Case(case_offset as u8); + sub.case = result.len(); + result.push(Atom::Case(0)); + }, + // End subpattern + b')' => { + // Should already have started a subpattern + let sub = subs.pop().ok_or(PatError::SubPattern)?; + // Prepare for the next save + save = cmp::max(sub.save_next, save); + depth = sub.depth; + // Neutralize the last case, since there are no more + result[sub.case] = Atom::Nop; + // Fill in the breaks + for &brk in &sub.brks { + let brk_offset = result.len() - brk - 1; + if brk_offset >= 256 { + return Err(PatError::SubOverflow); + } + result[brk] = Atom::Break(brk_offset as u8); + } + }, + // Skip many operator + b'[' => { + // Parse the lower bound + let mut lower_bound = 0u32; + let mut at_least_one_char = false; + loop { + chr = iter.next().cloned().ok_or(PatError::ManyInvalid)?; + match chr { + b'-' | b']' => break, + chr @ b'0'..=b'9' => { + at_least_one_char = true; + lower_bound = lower_bound * 10 + (chr - b'0') as u32; + if lower_bound >= 16384 { + return Err(PatError::ManyOverflow); + } + }, + _ => return Err(PatError::ManyInvalid), + } + } + if !at_least_one_char { + return Err(PatError::ManyInvalid); + } + // Turn the lower bound into skip ops + if lower_bound > 0 { + if lower_bound >= 256 { + result.push(Atom::Rangext((lower_bound >> 8) as u8)); + } + result.push(Atom::Skip((lower_bound & 0xff) as u8)); + } + // Second many part is optional + if chr == b']' { + continue; + } + // Parse the upper bound + let mut upper_bound = 0u32; + loop { + chr = iter.next().cloned().ok_or(PatError::ManyInvalid)?; + match chr { + b']' => break, + chr @ b'0'..=b'9' => { + upper_bound = upper_bound * 10 + (chr - b'0') as u32; + if upper_bound >= 16384 { + return Err(PatError::ManyOverflow); + } + }, + _ => return Err(PatError::ManyInvalid), + } + } + // Lower bound should be strictly less than the upper bound + if lower_bound < upper_bound { + let many_skip = upper_bound - lower_bound; + if many_skip >= 256 { + result.push(Atom::Rangext((many_skip >> 8) as u8)); + } + result.push(Atom::Many((many_skip & 0xff) as u8)); + } + else { + return Err(PatError::ManyRange); + } + }, + // Match a byte + b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' | b'.' => { + let mut mask = 0xFF; + + // High nibble of the byte + let hi = if chr == b'.' { mask &= 0x0F;0 } + else if chr >= b'a' { chr - b'a' + 10 } + else if chr >= b'A' { chr - b'A' + 10 } + else { chr - b'0' }; + + chr = iter.next().cloned().ok_or(PatError::UnpairedHexDigit)?; + // Low nibble of the byte + let lo = if chr >= b'a' && chr <= b'f' { chr - b'a' + 10 } + else if chr >= b'A' && chr <= b'F' { chr - b'A' + 10 } + else if chr >= b'0' && chr <= b'9' { chr - b'0' } + else if chr == b'.' { mask &= 0xF0; 0 } + else { return Err(PatError::UnpairedHexDigit); }; + + if mask == 0 { return Err(PatError::DoubleNibble); }; + + // mask out nibble + if mask != 0xFF { result.push(Atom::Fuzzy(mask)) } + + // Add byte to the pattern + result.push(Atom::Byte((hi << 4) + lo)); + }, + // Match raw bytes + b'"' => { + loop { + if let Some(chr) = iter.next().cloned() { + if chr != b'"' { + result.push(Atom::Byte(chr)); + } + else { + break; + } + } + else { + return Err(PatError::UnclosedQuote); + } + } + }, + // Save the cursor + b'\'' => { + // 'Limited' save space + if save >= u8::MAX { + return Err(PatError::SaveOverflow); + } + result.push(Atom::Save(save)); + save += 1; + }, + // Skip bytes + b'?' => { + // match result.last_mut() { + // Some(Atom::Skip(skip)) if *skip != PTR_SKIP && *skip < 127i8 => *skip += 1, + // _ => result.push(Atom::Skip(1)), + // }; + // Coalescence skips together + if let Some(Atom::Skip(skip)) = result.last_mut() { + if *skip != PTR_SKIP && *skip < 255u8 { + *skip += 1; + continue; + } + } + result.push(Atom::Skip(1)); + }, + + b'=' => { + let op = iter.next().cloned().ok_or(PatError::CheckOperand)?; + result.push( match op { + b'0'..=b'9' => Atom::Check(op - b'0'), + b'A'..=b'Z' => Atom::Check(10 + (op - b'A')), + b'a'..=b'z' => Atom::Check(10 + (op - b'a')), + _ => return Err(PatError::CheckOperand) + }); + }, + b'@' => { + let op = iter.next().cloned().ok_or(PatError::AlignedOperand)?; + result.push( match op { + b'0'..=b'9' => Atom::Aligned(op - b'0'), + b'A'..=b'Z' => Atom::Aligned(10 + (op - b'A')), + b'a'..=b'z' => Atom::Aligned(10 + (op - b'a')), + _ => return Err(PatError::AlignedOperand) + }); + }, + b'i' => { + let atom = match iter.next().cloned() { + Some(b'1') => Atom::ReadI8(save), + Some(b'2') => Atom::ReadI16(save), + Some(b'4') => Atom::ReadI32(save), + _ => return Err(PatError::ReadOperand), + }; + if save >= u8::MAX { + return Err(PatError::SaveOverflow); + } + save += 1; + result.push(atom); + }, + b'u' => { + let atom = match iter.next().cloned() { + Some(b'1') => Atom::ReadU8(save), + Some(b'2') => Atom::ReadU16(save), + Some(b'4') => Atom::ReadU32(save), + _ => return Err(PatError::ReadOperand), + }; + if save >= u8::MAX { + return Err(PatError::SaveOverflow); + } + save += 1; + result.push(atom); + }, + b'z' => { + if save >= u8::MAX { + return Err(PatError::SaveOverflow); + } + result.push(Atom::Zero(save)); + save += 1; + }, + + + // Allow spaces as padding + b' ' | b'\n' | b'\r' | b'\t' => {}, + // Everything else is illegal + _ => { + return Err(PatError::UnknownChar); + }, + } + // Converted from str originally, should be safe + *pat = unsafe { str::from_utf8_unchecked(iter.as_slice()) }; + } + // Check balanced stack operators + if depth != 0 { + return Err(PatError::StackError); + } + // Check if sub patterns are balanced + if subs.len() != 0 { + return Err(PatError::SubPattern); + } + + // Remove redundant atoms at the end + fn is_redundant(atom: &Atom) -> bool { + match atom { + | Atom::Skip(_) + | Atom::Rangext(_) + | Atom::Pop + | Atom::Many(_) => true, + _ => false, + } + } + while result.last().map(is_redundant).unwrap_or(false) { + result.pop(); + } + + Ok(()) +} \ No newline at end of file diff --git a/sub/xpat/Cargo.toml b/sub/xpat/Cargo.toml new file mode 100644 index 0000000..31457b4 --- /dev/null +++ b/sub/xpat/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "sub_xpat" +version = "0.1.0" +edition = "2021" + +[dependencies] +sub_core.workspace = true +sub_macros.workspace = true \ No newline at end of file diff --git a/sub/xpat/src/atoms.rs b/sub/xpat/src/atoms.rs new file mode 100644 index 0000000..a04b07b --- /dev/null +++ b/sub/xpat/src/atoms.rs @@ -0,0 +1,83 @@ +pub type Pattern<'l> = &'l[Atom]; + +/// Pattern atoms. +/// +/// The scanner will silently ignore nonsensical arguments. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum Atom { + /// Matches a single byte. + Byte(u8), + /// Captures the cursor in the save array at the specified index. + Save(u8), + /// After a Pop later continue matching at the current cursor plus the argument. + Push(u8), + /// Pops the cursor from the stack and continues matching. + Pop, + /// Sets a mask to apply on next byte match. + Fuzzy(u8), + /// Skips a fixed number of bytes. + Skip(u8), + /// Rewinds the cursor a fixed number of bytes. + Back(u8), + /// Extends the push, skip, back and many range by `argument * 256`. + Rangext(u8), + /// Looks for the next pattern at most a certain number of bytes ahead. + Many(u8), + /// Follows a signed 1 byte jump. + /// + /// Reads the byte under the cursor, sign extends it, adds it plus 1 to the cursor and continues matching. + Jump1, + /// Follows a signed 4 byte jump. + /// + /// Reads the dword under the cursor and adds it plus 4 to the cursor and continues matching. + Jump4, + /// Follows an absolute pointer. + /// + /// Reads the pointer under the cursor, translates it to an RVA, assigns it to the cursor and continues matching. + /// + /// Matching fails immediately when translation to an RVA fails. + Ptr, + /// Follows a position independent reference. + /// + /// Reads the dword under the cursor and adds it to the saved cursor for the given slot and continues matching. + Pir(u8), + /// Compares the cursor with the value in the given save slot and fails if they're not equal. + Check(u8), + /// Checks if the cursor is aligned to `(1 << value)`. + Aligned(u8), + /// Reads and sign-extends the byte under the cursor, writes to the given slot and advances the cursor by 1. + ReadI8(u8), + /// Reads and zero-extends the byte under the cursor, writes to the given slot and advances the cursor by 1. + ReadU8(u8), + /// Reads and sign-extends the word under the cursor, writes to the given slot and advances the cursor by 2. + ReadI16(u8), + /// Reads and zero-extends the word under the cursor, writes to the given slot and advances the cursor by 2. + ReadU16(u8), + /// Reads the dword under the cursor, writes to the given slot and advances the cursor by 4. + ReadI32(u8), + /// Reads the dword under the cursor, writes to the given slot and advances the cursor by 4. + ReadU32(u8), + /// Writes zero to the given save slot. + Zero(u8), + /// Sets a retry point when matching fails. + /// + /// When matching fails the cursor is restored and matching begins again skipping _N_ atoms. + Case(u8), + /// Continues matching after a case atom, skipping the next _N_ atoms. + Break(u8), + /// Null instruction, used to make the parser easier to write. + Nop, +} + +impl Atom { + pub fn save_len(pat: &[Atom]) -> usize { + pat.iter().filter_map(|&atom| { + match atom { + Atom::Save(slot) | Atom::Pir(slot) | Atom::Check(slot) | Atom::Zero(slot) | + Atom::ReadI8(slot) | Atom::ReadI16(slot) | Atom::ReadI32(slot) | + Atom::ReadU8(slot) | Atom::ReadU16(slot)| Atom::ReadU32(slot) => Some(slot as usize + 1), + _ => None, + } + }).max().unwrap_or(0) + } +} \ No newline at end of file diff --git a/sub/xpat/src/lib.rs b/sub/xpat/src/lib.rs new file mode 100644 index 0000000..a909ce9 --- /dev/null +++ b/sub/xpat/src/lib.rs @@ -0,0 +1,28 @@ +#![no_std] #![allow(unused)] + +pub mod atoms { + include!("atoms.rs"); +} + +pub mod scannable; +pub mod scanner; + + +// +// Export Preludes: +// + +pub mod prelude { + pub use sub_macros::pattern; + pub use crate::atoms::Pattern; + pub use crate::scanner::Scanner; +} + +pub mod public { + pub use crate::atoms::Atom; + pub use crate::scannable::Scannable; + pub use crate::scanner::{ + exec, scan_for_aob, make_aob + }; +} + diff --git a/sub/xpat/src/scannable.rs b/sub/xpat/src/scannable.rs new file mode 100644 index 0000000..911b1c3 --- /dev/null +++ b/sub/xpat/src/scannable.rs @@ -0,0 +1,34 @@ +use core::ops::Range; + +pub trait Scannable { + /// get total bounds + fn range(&self) -> Range; + + /// gets the chunk at the supplied address if there is one + fn chunk_at(&self, address: usize) -> Option<&[u8]>; + + /// given an address will return the next chunk, None if there are no more hcunks + fn next_chunk(&self, address: usize) -> Option<(usize, &[u8])>; + +} + + +impl Scannable for [u8] { + fn range(&self) -> Range { 0..self.len() } + fn chunk_at(&self, address: usize) -> Option<&[u8]> { + self.get(address..) + } + fn next_chunk(&self, _address: usize) -> Option<(usize, &[u8])> { None } +} + +/// In case you want to scan with a specific address +impl Scannable for (usize, &[u8]) { + fn range(&self) -> Range { self.0..(self.0 + self.1.len()) } + fn chunk_at(&self, address: usize) -> Option<&[u8]> { + match address.overflowing_sub(self.0) { + (address, false) => self.1.get(address..), + (_, true) => None, + } + } + fn next_chunk(&self, _address: usize) -> Option<(usize, &[u8])> { None } +} \ No newline at end of file diff --git a/sub/xpat/src/scanner.rs b/sub/xpat/src/scanner.rs new file mode 100644 index 0000000..18c7c67 --- /dev/null +++ b/sub/xpat/src/scanner.rs @@ -0,0 +1,418 @@ +use core::ops::{Range, RangeBounds, Bound}; +use crate::atoms::{Pattern, Atom}; +use crate::scannable::Scannable; +use sub_core::{pod::Pod}; + +const SKIP_VA: u32 = size_of::() as u32; + +pub struct Scanner<'a, S: Scannable + ?Sized> { + /// the binary to be scanned + bin: &'a S, + + /// the pattern + pat: Pattern<'a>, + + /// the range to search for the pattern in + range: Range, + + /// the current cursor position + cursor: usize, +} + +impl<'a, S: Scannable + ?Sized> Scanner<'a, S> { + + pub fn new(bin: &'a S, pat: Pattern<'a>, r: impl RangeBounds) -> Self { + let range = limit_range(bin, r); + let cursor = range.start; + Self { bin, pat, range, cursor } + } + + pub fn next(&mut self, saves: &mut [usize]) -> bool { + let mut aob = <[u8; 0x10] as Pod>::uninit(); + let aob = make_aob(self.pat, &mut aob); + + match !aob.is_empty() { + true => { + let upper_limit = self.range.end; + while let Some(address) = scan_for_aob(self.bin, self.cursor..upper_limit, aob) { + self.cursor = address + 1; + if exec(self.bin, address, self.pat, saves, self.range.clone()) { + return true; + } + } + false + } + false => { + while self.range.contains(&self.cursor) { + let current_cursor = self.cursor; + self.cursor += 1; + if exec(self.bin, current_cursor, self.pat, saves, self.range.clone()) { + return true; + } + } + false + } + } + } +} + +#[inline(always)] +pub fn exec( + bin: &Binary, + address: usize, + pattern: Pattern, + saves: &mut [usize], + range: Range, +) -> bool { + + let mut cursor = address; + let mut pc = 0; + + // pattern state + let mut mask = 0xff; + let mut ext_range = 0u32; + + #[inline(always)] + fn read(bin: &B, address: usize) -> Option { + let slice = bin.chunk_at(address)?; + if slice.len() >= size_of::() { + return Some(unsafe { (slice.as_ptr() as *const T).read_unaligned() }); + } + None + } + + while let Some(atom) = pattern.get(pc).cloned() { + pc += 1; + match atom { + + // Compare bytes + Atom::Byte(pat_byte) => { + let Some(byte) = read::<_, u8>(bin, cursor) else { return false; }; + if byte & mask != pat_byte & mask { return false; } + cursor += 1; + mask = 0xFF; + } + + // save the current address + Atom::Save(slot_idx) => { + if let Some(slot) = saves.get_mut(slot_idx as usize) { + *slot = cursor; + } + } + + Atom::Push(skip) => { + let skip = ext_range + skip as u32; + let skip = if skip == 0 { SKIP_VA } else { skip }; + + // start running the pattern from pc... + if !exec(bin, cursor, &pattern[pc..], saves, range.clone()) { + return false; + } + cursor = cursor.wrapping_add(skip as usize); + mask = 0xff; + ext_range = 0; + + // Iterate forward in the pattern looking for the POP for this push... + let mut counter = 1; + while counter != 0 { + // keep incrementing the pc so the next atom will be the one after pop + match pattern.get(pc) { + Some(Atom::Push(_)) => counter += 1, + Some(Atom::Pop) => counter -= 1, + None => return true, + _ => (/**/) + } + pc += 1; + } + } + + Atom::Pop => { + return true; + } + + Atom::Fuzzy(pat_mask) => { + mask = pat_mask; + } + + Atom::Skip(skip) => { + let skip = ext_range + skip as u32; + let skip = if skip == 0 { SKIP_VA } else { skip }; + cursor = cursor.wrapping_add(skip as usize); + ext_range = 0; + } + + Atom::Back(back) => { + let rewind = ext_range + back as u32; + let rewind = if rewind == 0 { SKIP_VA } else { rewind }; + cursor = cursor.wrapping_sub(rewind as usize); + ext_range = 0; + } + + Atom::Rangext(ext) => { + ext_range = ext as u32 * 256; + } + + Atom::Many(limit) => { + let limit = ext_range + limit as u32; + return exec_many(bin, cursor, &pattern[pc..], saves, range, limit); + } + + Atom::Jump1 => { + let Some(sbyte) = read::<_, i8>(bin, cursor) else { return false }; + cursor = cursor.wrapping_add(sbyte as usize).wrapping_add(1); + } + + Atom::Jump4 => { + let Some(sdword) = read::<_, i32>(bin, cursor) else { return false }; + cursor = cursor.wrapping_add(sdword as usize).wrapping_add(4); + } + + Atom::Ptr => { + let Some(sptr) = read::<_, usize>(bin, cursor) else { return false }; + cursor = sptr; + } + + Atom::Pir(slot) => { + let Some(sdword) = read::<_, i32>(bin, cursor) else { return false }; + let base = saves.get(slot as usize).cloned().unwrap_or(cursor); + cursor = base.wrapping_add(sdword as usize); + } + + Atom::Check(slot) => { + if let Some(&rva) = saves.get(slot as usize) { + if rva != cursor { return false; } + } + } + + Atom::Aligned(align) => { + if cursor & ((1 << align) - 1) != 0 { + return false; + } + } + + Atom::ReadU8(slot) => { + let Some(value) = read::<_, u8>(bin, cursor) else { return false }; + if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ } + } + Atom::ReadI8(slot) => { + let Some(value) = read::<_, i8>(bin, cursor) else { return false }; + if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ } + } + Atom::ReadU16(slot) => { + let Some(value) = read::<_, u16>(bin, cursor) else { return false }; + if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ } + } + Atom::ReadI16(slot) => { + let Some(value) = read::<_, i16>(bin, cursor) else { return false }; + if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ } + } + Atom::ReadU32(slot) => { + let Some(value) = read::<_, u32>(bin, cursor) else { return false }; + if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ } + } + Atom::ReadI32(slot) => { + let Some(value) = read::<_, i32>(bin, cursor) else { return false }; + if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ } + } + Atom::Zero(slot) => { + if let Some(slot) = saves.get_mut(slot as usize) { + *slot = 0; + } + } + Atom::Case(next) => { + if exec(bin, cursor, pattern, saves, range.clone()) { + // same as Push/Pop except we add the next from the break to the pc. + let mut counter = 1; + loop { + pc += 1; + match pattern.get(pc) { + Some(Atom::Case(_)) => counter += 1, + Some(Atom::Break(next)) => { + counter -= 1; + if counter == 0 { + pc += *next as usize + } + } + None => return true, + _ => (/**/) + } + } + } else { + // if the case fails go to the location defined by next + pc += next as usize; + } + } + Atom::Break(_next) => { + return true; + } + Atom::Nop => {} + } + } + true +} + + +#[inline(always)] +pub fn exec_many( + bin: &Binary, + address: usize, + pattern: Pattern, + saves: &mut [usize], + range: Range, + limit: u32, +) -> bool { + let mut aob = <[u8; 0x10] as Pod>::uninit(); + let aob = make_aob(pattern, &mut aob); + + let Some(chunk) = bin.chunk_at(address) else { + // pattern fails before we even try (out of bounds) + return false; + }; + + match !aob.is_empty() { + true => { + let upper_limit = address + limit as usize; + let mut cursor = address; + while let Some(address) = scan_for_aob(bin, cursor..upper_limit, aob) { + cursor = address; + if exec(bin, cursor, pattern, saves, range.clone()) { + return true; + } + cursor += 1; + } + false + } + false => { + // try to reduce the limit just in-case we can squeeze some perf out of it + for i in 0..(limit as usize).min(chunk.len()) { + if exec(bin, address + i, pattern, saves, range.clone()) { + return true; + } + } + false + } + } +} + + +#[inline(always)] +pub fn scan_for_aob( + bin: &Binary, + range: Range, + aob: &[u8], +) -> Option { + let mut address = range.start; + let upper_bounds = range.end; + + + while address < upper_bounds { + + // get the current chunk for the given address + let chunk = match bin.chunk_at(address) { + Some(chunk) => chunk, + + // the address is out of bounds, try to shift the address so its back in b ounds + None => match bin.next_chunk(address) { + + // the next chunk is in bounds so we will just correct the address and use that chunk instead + Some((naddr, nchunk)) if naddr < upper_bounds => { + address = naddr; + nchunk + } + + // no hope, give up + _ => return None, + } + }; + + // try to find the aob in the current chunk + if let Some(offset) = chunk.windows(aob.len()) + .take(upper_bounds.saturating_sub(address)).position(|c| c == aob) { + // we got a hit, return it + return Some(address + offset) + } + + + // the AOB was not found in the current chunk, now check if its contiguous between chunks: + if let Some((naddr, nchunk)) = bin.next_chunk(address) { + + // next chunk is out of bounds, give up + if naddr - aob.len() > upper_bounds { return None } + + // if chunks are contiguous and the aob is greater than one byte, + // check if the aob is on a chunk border + if address + chunk.len() == naddr && aob.len() > 1 { + // check if the aob is between two chunks :) + for i in 1..aob.len()-1 { + let (p1, p2) = aob.split_at(i); + if chunk.ends_with(p1) && nchunk.starts_with(p2) { + // aob was found between two chunks + // return this address + return Some(address + chunk.len() - i) + } + } + } + + // start scanning the next chunk + let naddr = naddr - aob.len(); + debug_assert!(naddr > address); + address = naddr; + } else { + return None + } + } + None + +} + + +/// Limits a selected range into the range of the binary... +fn limit_range( + bin: &Binary, + range: impl RangeBounds, +) -> Range { + let bin_range = bin.range(); + let start = match range.start_bound() { + Bound::Included(v) => bin_range.start.max(*v), + Bound::Excluded(v) => bin_range.start.max(v.saturating_add(1)), + Bound::Unbounded => bin_range.start, + }; + let end = match range.end_bound() { + Bound::Included(v) => bin_range.end.min(v.saturating_add(1)), + Bound::Excluded(v) => bin_range.end.min(*v), + Bound::Unbounded => bin_range.end + }; + start..end +} + +/// builds an array of bytes from the start of the pattern. +pub fn make_aob<'b>(pattern: &[Atom], buffer: &'b mut [u8]) -> &'b [u8] { + let mut i = 0; + for atoms in pattern { + match atoms { + Atom::Zero(_) => (/* do nothing */), + Atom::Save(_) => (/* do nothing */), + Atom::Byte(b) => { + buffer[i] = *b; + i += 1; + } + _ => break, + } + if i >= buffer.len() { + break; + } + } + &buffer[..i] +} + + + + + + + + + + + + diff --git a/x/Cargo.toml b/x/Cargo.toml index 383d3d8..b6fd9dd 100644 --- a/x/Cargo.toml +++ b/x/Cargo.toml @@ -10,7 +10,7 @@ macros = ["sub_macros"] libm = ["sub_libm"] pe = ["sub_pe"] - +xpat = ["core", "sub_xpat", "macros"] winuser = ["sub_winu", "pe", "sub_pe/windows"] [dependencies] @@ -18,4 +18,5 @@ sub_core = { workspace = true, optional = true } sub_libm = { workspace = true, optional = true} sub_pe = { workspace = true, optional = true } sub_winu = { workspace = true, optional = true } -sub_macros = { workspace = true, optional = true } \ No newline at end of file +sub_macros = { workspace = true, optional = true } +sub_xpat = { workspace = true, optional = true } \ No newline at end of file diff --git a/x/src/lib.rs b/x/src/lib.rs index 935d54c..fbf5c87 100644 --- a/x/src/lib.rs +++ b/x/src/lib.rs @@ -18,6 +18,7 @@ import!(sub_core, core, "core"); import!(sub_libm, libm, "libm"); import!(sub_pe, pe, "pe"); import!(sub_winu, win, "winuser"); +import!(sub_xpat, xpat, "xpat"); /// the macro crate is a proc macro, so it is a bit different. #[cfg(feature = "macros")] diff --git a/x/tests/test_xpat.rs b/x/tests/test_xpat.rs new file mode 100644 index 0000000..a875cfd --- /dev/null +++ b/x/tests/test_xpat.rs @@ -0,0 +1,17 @@ + + + +#[test] +pub fn test_pattern() { + + let pattern = x::pattern!("E8 [0-4] BB "); + let buffer: &[u8] = &[ 0xAA, 0xE8, 0xBB, 0xE8, 0x00, 0xBB, ]; + + let mut scanner = x::Scanner::new(buffer, pattern, ..); + let mut saves = [0usize;8]; + + assert!(scanner.next(&mut saves)); + assert_eq!(saves[0], 1); + assert!(scanner.next(&mut saves)); + assert_eq!(saves[0], 3); +}