xpat
This commit is contained in:
8
sub/xpat/Cargo.toml
Normal file
8
sub/xpat/Cargo.toml
Normal file
@@ -0,0 +1,8 @@
|
||||
[package]
|
||||
name = "sub_xpat"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
sub_core.workspace = true
|
||||
sub_macros.workspace = true
|
||||
83
sub/xpat/src/atoms.rs
Normal file
83
sub/xpat/src/atoms.rs
Normal file
@@ -0,0 +1,83 @@
|
||||
pub type Pattern<'l> = &'l[Atom];
|
||||
|
||||
/// Pattern atoms.
|
||||
///
|
||||
/// The scanner will silently ignore nonsensical arguments.
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||
pub enum Atom {
|
||||
/// Matches a single byte.
|
||||
Byte(u8),
|
||||
/// Captures the cursor in the save array at the specified index.
|
||||
Save(u8),
|
||||
/// After a Pop later continue matching at the current cursor plus the argument.
|
||||
Push(u8),
|
||||
/// Pops the cursor from the stack and continues matching.
|
||||
Pop,
|
||||
/// Sets a mask to apply on next byte match.
|
||||
Fuzzy(u8),
|
||||
/// Skips a fixed number of bytes.
|
||||
Skip(u8),
|
||||
/// Rewinds the cursor a fixed number of bytes.
|
||||
Back(u8),
|
||||
/// Extends the push, skip, back and many range by `argument * 256`.
|
||||
Rangext(u8),
|
||||
/// Looks for the next pattern at most a certain number of bytes ahead.
|
||||
Many(u8),
|
||||
/// Follows a signed 1 byte jump.
|
||||
///
|
||||
/// Reads the byte under the cursor, sign extends it, adds it plus 1 to the cursor and continues matching.
|
||||
Jump1,
|
||||
/// Follows a signed 4 byte jump.
|
||||
///
|
||||
/// Reads the dword under the cursor and adds it plus 4 to the cursor and continues matching.
|
||||
Jump4,
|
||||
/// Follows an absolute pointer.
|
||||
///
|
||||
/// Reads the pointer under the cursor, translates it to an RVA, assigns it to the cursor and continues matching.
|
||||
///
|
||||
/// Matching fails immediately when translation to an RVA fails.
|
||||
Ptr,
|
||||
/// Follows a position independent reference.
|
||||
///
|
||||
/// Reads the dword under the cursor and adds it to the saved cursor for the given slot and continues matching.
|
||||
Pir(u8),
|
||||
/// Compares the cursor with the value in the given save slot and fails if they're not equal.
|
||||
Check(u8),
|
||||
/// Checks if the cursor is aligned to `(1 << value)`.
|
||||
Aligned(u8),
|
||||
/// Reads and sign-extends the byte under the cursor, writes to the given slot and advances the cursor by 1.
|
||||
ReadI8(u8),
|
||||
/// Reads and zero-extends the byte under the cursor, writes to the given slot and advances the cursor by 1.
|
||||
ReadU8(u8),
|
||||
/// Reads and sign-extends the word under the cursor, writes to the given slot and advances the cursor by 2.
|
||||
ReadI16(u8),
|
||||
/// Reads and zero-extends the word under the cursor, writes to the given slot and advances the cursor by 2.
|
||||
ReadU16(u8),
|
||||
/// Reads the dword under the cursor, writes to the given slot and advances the cursor by 4.
|
||||
ReadI32(u8),
|
||||
/// Reads the dword under the cursor, writes to the given slot and advances the cursor by 4.
|
||||
ReadU32(u8),
|
||||
/// Writes zero to the given save slot.
|
||||
Zero(u8),
|
||||
/// Sets a retry point when matching fails.
|
||||
///
|
||||
/// When matching fails the cursor is restored and matching begins again skipping _N_ atoms.
|
||||
Case(u8),
|
||||
/// Continues matching after a case atom, skipping the next _N_ atoms.
|
||||
Break(u8),
|
||||
/// Null instruction, used to make the parser easier to write.
|
||||
Nop,
|
||||
}
|
||||
|
||||
impl Atom {
|
||||
pub fn save_len(pat: &[Atom]) -> usize {
|
||||
pat.iter().filter_map(|&atom| {
|
||||
match atom {
|
||||
Atom::Save(slot) | Atom::Pir(slot) | Atom::Check(slot) | Atom::Zero(slot) |
|
||||
Atom::ReadI8(slot) | Atom::ReadI16(slot) | Atom::ReadI32(slot) |
|
||||
Atom::ReadU8(slot) | Atom::ReadU16(slot)| Atom::ReadU32(slot) => Some(slot as usize + 1),
|
||||
_ => None,
|
||||
}
|
||||
}).max().unwrap_or(0)
|
||||
}
|
||||
}
|
||||
28
sub/xpat/src/lib.rs
Normal file
28
sub/xpat/src/lib.rs
Normal file
@@ -0,0 +1,28 @@
|
||||
#![no_std] #![allow(unused)]
|
||||
|
||||
pub mod atoms {
|
||||
include!("atoms.rs");
|
||||
}
|
||||
|
||||
pub mod scannable;
|
||||
pub mod scanner;
|
||||
|
||||
|
||||
//
|
||||
// Export Preludes:
|
||||
//
|
||||
|
||||
pub mod prelude {
|
||||
pub use sub_macros::pattern;
|
||||
pub use crate::atoms::Pattern;
|
||||
pub use crate::scanner::Scanner;
|
||||
}
|
||||
|
||||
pub mod public {
|
||||
pub use crate::atoms::Atom;
|
||||
pub use crate::scannable::Scannable;
|
||||
pub use crate::scanner::{
|
||||
exec, scan_for_aob, make_aob
|
||||
};
|
||||
}
|
||||
|
||||
34
sub/xpat/src/scannable.rs
Normal file
34
sub/xpat/src/scannable.rs
Normal file
@@ -0,0 +1,34 @@
|
||||
use core::ops::Range;
|
||||
|
||||
pub trait Scannable {
|
||||
/// get total bounds
|
||||
fn range(&self) -> Range<usize>;
|
||||
|
||||
/// gets the chunk at the supplied address if there is one
|
||||
fn chunk_at(&self, address: usize) -> Option<&[u8]>;
|
||||
|
||||
/// given an address will return the next chunk, None if there are no more hcunks
|
||||
fn next_chunk(&self, address: usize) -> Option<(usize, &[u8])>;
|
||||
|
||||
}
|
||||
|
||||
|
||||
impl Scannable for [u8] {
|
||||
fn range(&self) -> Range<usize> { 0..self.len() }
|
||||
fn chunk_at(&self, address: usize) -> Option<&[u8]> {
|
||||
self.get(address..)
|
||||
}
|
||||
fn next_chunk(&self, _address: usize) -> Option<(usize, &[u8])> { None }
|
||||
}
|
||||
|
||||
/// In case you want to scan with a specific address
|
||||
impl Scannable for (usize, &[u8]) {
|
||||
fn range(&self) -> Range<usize> { self.0..(self.0 + self.1.len()) }
|
||||
fn chunk_at(&self, address: usize) -> Option<&[u8]> {
|
||||
match address.overflowing_sub(self.0) {
|
||||
(address, false) => self.1.get(address..),
|
||||
(_, true) => None,
|
||||
}
|
||||
}
|
||||
fn next_chunk(&self, _address: usize) -> Option<(usize, &[u8])> { None }
|
||||
}
|
||||
418
sub/xpat/src/scanner.rs
Normal file
418
sub/xpat/src/scanner.rs
Normal file
@@ -0,0 +1,418 @@
|
||||
use core::ops::{Range, RangeBounds, Bound};
|
||||
use crate::atoms::{Pattern, Atom};
|
||||
use crate::scannable::Scannable;
|
||||
use sub_core::{pod::Pod};
|
||||
|
||||
const SKIP_VA: u32 = size_of::<usize>() as u32;
|
||||
|
||||
pub struct Scanner<'a, S: Scannable + ?Sized> {
|
||||
/// the binary to be scanned
|
||||
bin: &'a S,
|
||||
|
||||
/// the pattern
|
||||
pat: Pattern<'a>,
|
||||
|
||||
/// the range to search for the pattern in
|
||||
range: Range<usize>,
|
||||
|
||||
/// the current cursor position
|
||||
cursor: usize,
|
||||
}
|
||||
|
||||
impl<'a, S: Scannable + ?Sized> Scanner<'a, S> {
|
||||
|
||||
pub fn new(bin: &'a S, pat: Pattern<'a>, r: impl RangeBounds<usize>) -> Self {
|
||||
let range = limit_range(bin, r);
|
||||
let cursor = range.start;
|
||||
Self { bin, pat, range, cursor }
|
||||
}
|
||||
|
||||
pub fn next(&mut self, saves: &mut [usize]) -> bool {
|
||||
let mut aob = <[u8; 0x10] as Pod>::uninit();
|
||||
let aob = make_aob(self.pat, &mut aob);
|
||||
|
||||
match !aob.is_empty() {
|
||||
true => {
|
||||
let upper_limit = self.range.end;
|
||||
while let Some(address) = scan_for_aob(self.bin, self.cursor..upper_limit, aob) {
|
||||
self.cursor = address + 1;
|
||||
if exec(self.bin, address, self.pat, saves, self.range.clone()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
false => {
|
||||
while self.range.contains(&self.cursor) {
|
||||
let current_cursor = self.cursor;
|
||||
self.cursor += 1;
|
||||
if exec(self.bin, current_cursor, self.pat, saves, self.range.clone()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn exec<Binary: Scannable + ?Sized>(
|
||||
bin: &Binary,
|
||||
address: usize,
|
||||
pattern: Pattern,
|
||||
saves: &mut [usize],
|
||||
range: Range<usize>,
|
||||
) -> bool {
|
||||
|
||||
let mut cursor = address;
|
||||
let mut pc = 0;
|
||||
|
||||
// pattern state
|
||||
let mut mask = 0xff;
|
||||
let mut ext_range = 0u32;
|
||||
|
||||
#[inline(always)]
|
||||
fn read<B: Scannable + ?Sized, T: Sized + Copy>(bin: &B, address: usize) -> Option<T> {
|
||||
let slice = bin.chunk_at(address)?;
|
||||
if slice.len() >= size_of::<T>() {
|
||||
return Some(unsafe { (slice.as_ptr() as *const T).read_unaligned() });
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
while let Some(atom) = pattern.get(pc).cloned() {
|
||||
pc += 1;
|
||||
match atom {
|
||||
|
||||
// Compare bytes
|
||||
Atom::Byte(pat_byte) => {
|
||||
let Some(byte) = read::<_, u8>(bin, cursor) else { return false; };
|
||||
if byte & mask != pat_byte & mask { return false; }
|
||||
cursor += 1;
|
||||
mask = 0xFF;
|
||||
}
|
||||
|
||||
// save the current address
|
||||
Atom::Save(slot_idx) => {
|
||||
if let Some(slot) = saves.get_mut(slot_idx as usize) {
|
||||
*slot = cursor;
|
||||
}
|
||||
}
|
||||
|
||||
Atom::Push(skip) => {
|
||||
let skip = ext_range + skip as u32;
|
||||
let skip = if skip == 0 { SKIP_VA } else { skip };
|
||||
|
||||
// start running the pattern from pc...
|
||||
if !exec(bin, cursor, &pattern[pc..], saves, range.clone()) {
|
||||
return false;
|
||||
}
|
||||
cursor = cursor.wrapping_add(skip as usize);
|
||||
mask = 0xff;
|
||||
ext_range = 0;
|
||||
|
||||
// Iterate forward in the pattern looking for the POP for this push...
|
||||
let mut counter = 1;
|
||||
while counter != 0 {
|
||||
// keep incrementing the pc so the next atom will be the one after pop
|
||||
match pattern.get(pc) {
|
||||
Some(Atom::Push(_)) => counter += 1,
|
||||
Some(Atom::Pop) => counter -= 1,
|
||||
None => return true,
|
||||
_ => (/**/)
|
||||
}
|
||||
pc += 1;
|
||||
}
|
||||
}
|
||||
|
||||
Atom::Pop => {
|
||||
return true;
|
||||
}
|
||||
|
||||
Atom::Fuzzy(pat_mask) => {
|
||||
mask = pat_mask;
|
||||
}
|
||||
|
||||
Atom::Skip(skip) => {
|
||||
let skip = ext_range + skip as u32;
|
||||
let skip = if skip == 0 { SKIP_VA } else { skip };
|
||||
cursor = cursor.wrapping_add(skip as usize);
|
||||
ext_range = 0;
|
||||
}
|
||||
|
||||
Atom::Back(back) => {
|
||||
let rewind = ext_range + back as u32;
|
||||
let rewind = if rewind == 0 { SKIP_VA } else { rewind };
|
||||
cursor = cursor.wrapping_sub(rewind as usize);
|
||||
ext_range = 0;
|
||||
}
|
||||
|
||||
Atom::Rangext(ext) => {
|
||||
ext_range = ext as u32 * 256;
|
||||
}
|
||||
|
||||
Atom::Many(limit) => {
|
||||
let limit = ext_range + limit as u32;
|
||||
return exec_many(bin, cursor, &pattern[pc..], saves, range, limit);
|
||||
}
|
||||
|
||||
Atom::Jump1 => {
|
||||
let Some(sbyte) = read::<_, i8>(bin, cursor) else { return false };
|
||||
cursor = cursor.wrapping_add(sbyte as usize).wrapping_add(1);
|
||||
}
|
||||
|
||||
Atom::Jump4 => {
|
||||
let Some(sdword) = read::<_, i32>(bin, cursor) else { return false };
|
||||
cursor = cursor.wrapping_add(sdword as usize).wrapping_add(4);
|
||||
}
|
||||
|
||||
Atom::Ptr => {
|
||||
let Some(sptr) = read::<_, usize>(bin, cursor) else { return false };
|
||||
cursor = sptr;
|
||||
}
|
||||
|
||||
Atom::Pir(slot) => {
|
||||
let Some(sdword) = read::<_, i32>(bin, cursor) else { return false };
|
||||
let base = saves.get(slot as usize).cloned().unwrap_or(cursor);
|
||||
cursor = base.wrapping_add(sdword as usize);
|
||||
}
|
||||
|
||||
Atom::Check(slot) => {
|
||||
if let Some(&rva) = saves.get(slot as usize) {
|
||||
if rva != cursor { return false; }
|
||||
}
|
||||
}
|
||||
|
||||
Atom::Aligned(align) => {
|
||||
if cursor & ((1 << align) - 1) != 0 {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
Atom::ReadU8(slot) => {
|
||||
let Some(value) = read::<_, u8>(bin, cursor) else { return false };
|
||||
if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ }
|
||||
}
|
||||
Atom::ReadI8(slot) => {
|
||||
let Some(value) = read::<_, i8>(bin, cursor) else { return false };
|
||||
if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ }
|
||||
}
|
||||
Atom::ReadU16(slot) => {
|
||||
let Some(value) = read::<_, u16>(bin, cursor) else { return false };
|
||||
if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ }
|
||||
}
|
||||
Atom::ReadI16(slot) => {
|
||||
let Some(value) = read::<_, i16>(bin, cursor) else { return false };
|
||||
if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ }
|
||||
}
|
||||
Atom::ReadU32(slot) => {
|
||||
let Some(value) = read::<_, u32>(bin, cursor) else { return false };
|
||||
if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ }
|
||||
}
|
||||
Atom::ReadI32(slot) => {
|
||||
let Some(value) = read::<_, i32>(bin, cursor) else { return false };
|
||||
if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ }
|
||||
}
|
||||
Atom::Zero(slot) => {
|
||||
if let Some(slot) = saves.get_mut(slot as usize) {
|
||||
*slot = 0;
|
||||
}
|
||||
}
|
||||
Atom::Case(next) => {
|
||||
if exec(bin, cursor, pattern, saves, range.clone()) {
|
||||
// same as Push/Pop except we add the next from the break to the pc.
|
||||
let mut counter = 1;
|
||||
loop {
|
||||
pc += 1;
|
||||
match pattern.get(pc) {
|
||||
Some(Atom::Case(_)) => counter += 1,
|
||||
Some(Atom::Break(next)) => {
|
||||
counter -= 1;
|
||||
if counter == 0 {
|
||||
pc += *next as usize
|
||||
}
|
||||
}
|
||||
None => return true,
|
||||
_ => (/**/)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// if the case fails go to the location defined by next
|
||||
pc += next as usize;
|
||||
}
|
||||
}
|
||||
Atom::Break(_next) => {
|
||||
return true;
|
||||
}
|
||||
Atom::Nop => {}
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
|
||||
#[inline(always)]
|
||||
pub fn exec_many<Binary: Scannable + ?Sized >(
|
||||
bin: &Binary,
|
||||
address: usize,
|
||||
pattern: Pattern,
|
||||
saves: &mut [usize],
|
||||
range: Range<usize>,
|
||||
limit: u32,
|
||||
) -> bool {
|
||||
let mut aob = <[u8; 0x10] as Pod>::uninit();
|
||||
let aob = make_aob(pattern, &mut aob);
|
||||
|
||||
let Some(chunk) = bin.chunk_at(address) else {
|
||||
// pattern fails before we even try (out of bounds)
|
||||
return false;
|
||||
};
|
||||
|
||||
match !aob.is_empty() {
|
||||
true => {
|
||||
let upper_limit = address + limit as usize;
|
||||
let mut cursor = address;
|
||||
while let Some(address) = scan_for_aob(bin, cursor..upper_limit, aob) {
|
||||
cursor = address;
|
||||
if exec(bin, cursor, pattern, saves, range.clone()) {
|
||||
return true;
|
||||
}
|
||||
cursor += 1;
|
||||
}
|
||||
false
|
||||
}
|
||||
false => {
|
||||
// try to reduce the limit just in-case we can squeeze some perf out of it
|
||||
for i in 0..(limit as usize).min(chunk.len()) {
|
||||
if exec(bin, address + i, pattern, saves, range.clone()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[inline(always)]
|
||||
pub fn scan_for_aob<Binary: Scannable + ?Sized>(
|
||||
bin: &Binary,
|
||||
range: Range<usize>,
|
||||
aob: &[u8],
|
||||
) -> Option<usize> {
|
||||
let mut address = range.start;
|
||||
let upper_bounds = range.end;
|
||||
|
||||
|
||||
while address < upper_bounds {
|
||||
|
||||
// get the current chunk for the given address
|
||||
let chunk = match bin.chunk_at(address) {
|
||||
Some(chunk) => chunk,
|
||||
|
||||
// the address is out of bounds, try to shift the address so its back in b ounds
|
||||
None => match bin.next_chunk(address) {
|
||||
|
||||
// the next chunk is in bounds so we will just correct the address and use that chunk instead
|
||||
Some((naddr, nchunk)) if naddr < upper_bounds => {
|
||||
address = naddr;
|
||||
nchunk
|
||||
}
|
||||
|
||||
// no hope, give up
|
||||
_ => return None,
|
||||
}
|
||||
};
|
||||
|
||||
// try to find the aob in the current chunk
|
||||
if let Some(offset) = chunk.windows(aob.len())
|
||||
.take(upper_bounds.saturating_sub(address)).position(|c| c == aob) {
|
||||
// we got a hit, return it
|
||||
return Some(address + offset)
|
||||
}
|
||||
|
||||
|
||||
// the AOB was not found in the current chunk, now check if its contiguous between chunks:
|
||||
if let Some((naddr, nchunk)) = bin.next_chunk(address) {
|
||||
|
||||
// next chunk is out of bounds, give up
|
||||
if naddr - aob.len() > upper_bounds { return None }
|
||||
|
||||
// if chunks are contiguous and the aob is greater than one byte,
|
||||
// check if the aob is on a chunk border
|
||||
if address + chunk.len() == naddr && aob.len() > 1 {
|
||||
// check if the aob is between two chunks :)
|
||||
for i in 1..aob.len()-1 {
|
||||
let (p1, p2) = aob.split_at(i);
|
||||
if chunk.ends_with(p1) && nchunk.starts_with(p2) {
|
||||
// aob was found between two chunks
|
||||
// return this address
|
||||
return Some(address + chunk.len() - i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// start scanning the next chunk
|
||||
let naddr = naddr - aob.len();
|
||||
debug_assert!(naddr > address);
|
||||
address = naddr;
|
||||
} else {
|
||||
return None
|
||||
}
|
||||
}
|
||||
None
|
||||
|
||||
}
|
||||
|
||||
|
||||
/// Limits a selected range into the range of the binary...
|
||||
fn limit_range<Binary: Scannable + ?Sized>(
|
||||
bin: &Binary,
|
||||
range: impl RangeBounds<usize>,
|
||||
) -> Range<usize> {
|
||||
let bin_range = bin.range();
|
||||
let start = match range.start_bound() {
|
||||
Bound::Included(v) => bin_range.start.max(*v),
|
||||
Bound::Excluded(v) => bin_range.start.max(v.saturating_add(1)),
|
||||
Bound::Unbounded => bin_range.start,
|
||||
};
|
||||
let end = match range.end_bound() {
|
||||
Bound::Included(v) => bin_range.end.min(v.saturating_add(1)),
|
||||
Bound::Excluded(v) => bin_range.end.min(*v),
|
||||
Bound::Unbounded => bin_range.end
|
||||
};
|
||||
start..end
|
||||
}
|
||||
|
||||
/// builds an array of bytes from the start of the pattern.
|
||||
pub fn make_aob<'b>(pattern: &[Atom], buffer: &'b mut [u8]) -> &'b [u8] {
|
||||
let mut i = 0;
|
||||
for atoms in pattern {
|
||||
match atoms {
|
||||
Atom::Zero(_) => (/* do nothing */),
|
||||
Atom::Save(_) => (/* do nothing */),
|
||||
Atom::Byte(b) => {
|
||||
buffer[i] = *b;
|
||||
i += 1;
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
if i >= buffer.len() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
&buffer[..i]
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user