xpat

2024-11-14 14:39:09 -05:00
parent 427eb56a50
commit 020bd3a5db
11 changed files with 1131 additions and 3 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,7 +7,8 @@ members = [
    "sub/libm",
    "sub/pe",
    "sub/winu",
-    "sub/_macros"
+    "sub/_macros",
+    "sub/xpat",
 ]

 [workspace.dependencies]
@@ -16,3 +17,4 @@ sub_libm = { path = "sub/libm" }
 sub_pe = { path = "sub/pe" }
 sub_winu = { path = "sub/winu" }
 sub_macros = { path = "sub/_macros" }
+sub_xpat = { path = "sub/xpat" }
--- a/sub/_macros/src/lib.rs
+++ b/sub/_macros/src/lib.rs
@@ -1,6 +1,12 @@
+#![allow(unused)]

 mod from_repr;
+mod patterns;

+#[proc_macro]
+pub fn pattern(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
+    patterns::proc_pattern(input)
+}

 #[proc_macro_derive(FromRepr)]
 pub fn derive_from_repr(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
--- a/sub/_macros/src/patterns.rs
+++ b/sub/_macros/src/patterns.rs
@@ -0,0 +1,530 @@
+use core::{cmp, fmt, mem, str};
+use proc_macro::{Literal, TokenStream, TokenTree};
+
+/// Compile time pattern parser.
+///
+/// ```ignore
+/// const PATTERN: &[pelite::pattern::Atom] = pattern!("pattern string");
+/// ```
+pub fn proc_pattern(input: TokenStream) -> TokenStream {
+    let input = input.into_iter().collect::<Vec<_>>();
+
+    let string = match &input[..] {
+        [TokenTree::Literal(lit)] => parse_str_literal(&lit),
+        _ => panic!("expected a single string literal to parse"),
+    };
+
+    let pattern = match parse(&string) {
+        Ok(pattern) => pattern,
+        Err(err) => panic!("invalid pattern syntax: {}", err),
+    };
+
+    format!("{{ use x::xpat::Atom::*; &{:?} as x::Pattern }}", pattern).parse().unwrap()
+}
+
+fn parse_str_literal(input: &Literal) -> String {
+    let input = input.to_string();
+    let mut chars = input.chars();
+    let mut string = String::new();
+    if chars.next() != Some('"') {
+        panic!("expected string literal starting with a `\"` and no extraneous whitespace");
+    }
+    loop {
+        let chr = match chars.next() {
+            Some('\\') => {
+                match chars.next() {
+                    Some('\\') => '\\',
+                    Some('\'') => '\'',
+                    Some('\"') => '\"',
+                    Some('t') => '\t',
+                    Some('r') => '\r',
+                    Some('n') => '\n',
+                    Some('u') => panic!("unicode escape sequence not supported"),
+                    Some(chr) => panic!("unknown escape sequence: {}", chr),
+                    None => panic!(""),
+                }
+            },
+            Some('"') => break,
+            Some(chr) => chr,
+            None => panic!("unexpected end of string literal, missing `\"` terminator?"),
+        };
+        string.push(chr);
+    }
+    string
+}
+
+
+/// Special skip value to indicate to use platform pointer size instead.
+pub(crate) const PTR_SKIP: u8 = 0;
+
+/// Pattern parsing error.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub struct ParsePatError {
+    kind: PatError,
+    position: usize,
+}
+impl fmt::Display for ParsePatError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "Syntax Error @{}: {}.", self.position, self.kind.to_str())
+    }
+}
+
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+enum PatError {
+    UnpairedHexDigit,
+    UnknownChar,
+    ManyOverflow,
+    ManyRange,
+    ManyInvalid,
+    SaveOverflow,
+    StackError,
+    StackInvalid,
+    UnclosedQuote,
+    AlignedOperand,
+    CheckOperand,
+    ReadOperand,
+    SubPattern,
+    SubOverflow,
+    DoubleNibble
+}
+impl PatError {
+    fn to_str(self) -> &'static str {
+        match self {
+            PatError::UnpairedHexDigit => "unpaired hex digit",
+            PatError::UnknownChar => "unknown character",
+            PatError::ManyOverflow => "many range exceeded",
+            PatError::ManyRange => "many bounds nonsensical",
+            PatError::ManyInvalid => "many invalid syntax",
+            PatError::SaveOverflow => "save store overflow",
+            PatError::StackError => "stack unbalanced",
+            PatError::StackInvalid => "stack must follow jump",
+            PatError::UnclosedQuote => "string missing end quote",
+            PatError::AlignedOperand => "aligned operand error",
+            PatError::CheckOperand => "aligned operand error",
+            PatError::ReadOperand => "read operand error",
+            PatError::SubPattern => "sub pattern error",
+            PatError::SubOverflow => "sub pattern too large",
+            PatError::DoubleNibble => "unpaired nibble wildcard",
+        }
+    }
+}
+
+//----------------------------------------------------------------
+
+include!("../../xpat/src/atoms.rs");
+
+/// Pattern parser.
+///
+/// # Remarks
+///
+/// Following are examples of the pattern syntax.
+/// The syntax takes inspiration from [YARA hexadecimal strings](https://yara.readthedocs.io/en/v3.7.0/writingrules.html#hexadecimal-strings).
+///
+/// ```text
+/// 55 89 e5 83 ? ec
+/// ```
+///
+/// Case insensitive hexadecimal characters match the exact byte pattern and question marks serve as placeholders for unknown bytes.
+///
+/// Note that a single question mark matches a whole byte. The syntax to mask part of a byte is not yet available.
+///
+/// Spaces (code point 32) are completely optional and carry no semantic meaning, their purpose is to visually group things together.
+///
+/// ```text
+/// b9 ' 37 13 00 00
+/// ```
+///
+/// Single quotes are used as a bookmarks, to save the current cursor rva in the save array passed to the scanner.
+///
+/// It is no longer necessary to do tedious address calculations to read information out of the byte stream after a match was found.
+/// This power really comes to life with the capability to follow relative and absolute references.
+///
+/// The first entry in the save array is reserved for the rva where the pattern was matched.
+/// The rest of the save array is filled in order of appearance of the quotes. Here the rva of the quote can be found in `save[1]`.
+///
+/// ```text
+/// b8 [16] 50 [13-42] ff
+/// ```
+///
+/// Pairs of decimal numbers separated by a hypen in square brackets indicate the lower and upper bound of number of bytes to skip.
+/// The scanner is non greedy and considers the first match while skipping as little as possible.
+///
+/// A single decimal number in square brackets without hypens is a fixed size jump, equivalent to writing that number of consecutive question marks.
+///
+/// ```text
+/// 31 c0 74 % ' c3
+/// e8 $ ' 31 c0 c3
+/// 68 * ' 31 c0 c3
+/// ```
+///
+/// These symbols are used to follow; a signed 1 byte relative jump: `%`, a signed 4 byte relative jump: `$` and an absolute pointer: `*`.
+///
+/// They are designed to be able to have the scanner follow short jumps, calls and longer jumps, and absolute pointers.
+///
+/// Composes really well with bookmarks to find the addresses of referenced functions and other data without tedious address calculations.
+///
+/// ```text
+/// b8 * "STRING" 00
+/// ```
+///
+/// String literals appear in double quotes and will be matched as UTF-8.
+///
+/// Escape sequences are not supported, switch back to matching with hex digits as needed.
+/// For UTF-16 support, you are welcome to send a PR.
+///
+/// ```text
+/// e8 $ { ' } 83 f0 5c c3
+/// ```
+///
+/// Curly braces must follow a jump symbol (see above).
+///
+/// The sub pattern enclosed within the curly braces is matched at the destination after following the jump.
+/// After the pattern successfully matched, the cursor returns to before the jump was followed.
+/// The bytes defining the jump are skipped and matching continues again from here.
+///
+/// ```text
+/// e8 $ @4
+/// ```
+///
+/// Checks that the cursor is aligned at this point in the scan.
+/// The align value is `(1 << arg)`, in this example the cursor is checked to be aligned to 16.
+///
+/// ```text
+/// e8 i1 a0 u4
+/// ```
+///
+/// An `i` or `u` indicates memory read operations followed by the size of the operand to read.
+///
+/// The read values are stored in the save array alongside the bookmarked addresses (single quotes).
+/// This means the values are sign- or zero- extended respectively before being stored.
+/// Operand sizes are 1 (byte), 2 (word) or 4 (dword).
+///
+/// The cursor is advanced by the size of the operand.
+///
+/// ```text
+/// 83 c0 2a ( 6a ? | 68 ? ? ? ? ) e8
+/// ```
+///
+/// Parentheses indicate alternate subpatterns separated by a pipe character.
+///
+/// The scanner attempts to match the alternate subpatterns from left to right and fails if none of them match.
+pub fn parse(pat: &str) -> Result<Vec<Atom>, ParsePatError> {
+    let mut result = Vec::with_capacity(pat.len() / 2);
+    let mut pat_end = pat;
+    match parse_helper(&mut pat_end, &mut result) {
+        Ok(()) => Ok(result),
+        Err(kind) => {
+            let position = pat_end.as_ptr() as usize - pat.as_ptr() as usize;
+            Err(ParsePatError { kind, position })
+        },
+    }
+}
+// This is preferable but currently limited by macro rules...
+// pub use crate::pattern as parse;
+fn parse_helper(pat: &mut &str, result: &mut Vec<Atom>) -> Result<(), PatError> {
+    result.push(Atom::Save(0));
+    let mut iter = pat.as_bytes().iter();
+    let mut save = 1;
+    let mut depth = 0;
+    #[derive(Default)]
+    struct SubPattern {
+        case: usize,
+        brks: Vec<usize>,
+        save: u8,
+        save_next: u8,
+        depth: u8,
+    }
+    let mut subs = Vec::<SubPattern>::new();
+    while let Some(mut chr) = iter.next().cloned() {
+        match chr {
+            // Follow signed 1 byte jump
+            b'%' => result.push(Atom::Jump1),
+            // Follow signed 4 byte jump
+            b'$' => result.push(Atom::Jump4),
+            // Follow pointer
+            b'*' => result.push(Atom::Ptr),
+            // Start recursive operator
+            b'{' => {
+                depth += 1;
+                // Must follow a jump operator and insert push before the jump
+                let atom = match result.last_mut() {
+                    Some(atom @ Atom::Jump1) => mem::replace(atom, Atom::Push(1)),
+                    Some(atom @ Atom::Jump4) => mem::replace(atom, Atom::Push(4)),
+                    Some(atom @ Atom::Ptr) => mem::replace(atom, Atom::Push(PTR_SKIP)),
+                    _ => return Err(PatError::StackInvalid),
+                };
+                result.push(atom);
+            },
+            // End recursive operator
+            b'}' => {
+                // Unbalanced recursion
+                if depth <= 0 {
+                    return Err(PatError::StackError);
+                }
+                depth -= 1;
+                result.push(Atom::Pop);
+            },
+            // Start subpattern
+            b'(' => {
+                subs.push(SubPattern::default());
+                let sub = subs.last_mut().unwrap();
+                // Keep the save and depth state
+                sub.save = save;
+                sub.depth = depth;
+                // Add a new case, update the case offset later
+                sub.case = result.len();
+                result.push(Atom::Case(0));
+            },
+            // Case subpattern
+            b'|' => {
+                // Should already have started a subpattern
+                let sub = subs.last_mut().ok_or(PatError::SubPattern)?;
+                // Update the save state
+                sub.save_next = cmp::max(sub.save_next, save);
+                save = sub.save;
+                depth = sub.depth;
+                // Add a break of the previous subpattern
+                sub.brks.push(result.len());
+                result.push(Atom::Break(0));
+                // Add a new case of the next subpattern
+                let case_offset = result.len() - sub.case - 1;
+                if case_offset >= 256 {
+                    return Err(PatError::SubOverflow);
+                }
+                result[sub.case] = Atom::Case(case_offset as u8);
+                sub.case = result.len();
+                result.push(Atom::Case(0));
+            },
+            // End subpattern
+            b')' => {
+                // Should already have started a subpattern
+                let sub = subs.pop().ok_or(PatError::SubPattern)?;
+                // Prepare for the next save
+                save = cmp::max(sub.save_next, save);
+                depth = sub.depth;
+                // Neutralize the last case, since there are no more
+                result[sub.case] = Atom::Nop;
+                // Fill in the breaks
+                for &brk in &sub.brks {
+                    let brk_offset = result.len() - brk - 1;
+                    if brk_offset >= 256 {
+                        return Err(PatError::SubOverflow);
+                    }
+                    result[brk] = Atom::Break(brk_offset as u8);
+                }
+            },
+            // Skip many operator
+            b'[' => {
+                // Parse the lower bound
+                let mut lower_bound = 0u32;
+                let mut at_least_one_char = false;
+                loop {
+                    chr = iter.next().cloned().ok_or(PatError::ManyInvalid)?;
+                    match chr {
+                        b'-' | b']' => break,
+                        chr @ b'0'..=b'9' => {
+                            at_least_one_char = true;
+                            lower_bound = lower_bound * 10 + (chr - b'0') as u32;
+                            if lower_bound >= 16384 {
+                                return Err(PatError::ManyOverflow);
+                            }
+                        },
+                        _ => return Err(PatError::ManyInvalid),
+                    }
+                }
+                if !at_least_one_char {
+                    return Err(PatError::ManyInvalid);
+                }
+                // Turn the lower bound into skip ops
+                if lower_bound > 0 {
+                    if lower_bound >= 256 {
+                        result.push(Atom::Rangext((lower_bound >> 8) as u8));
+                    }
+                    result.push(Atom::Skip((lower_bound & 0xff) as u8));
+                }
+                // Second many part is optional
+                if chr == b']' {
+                    continue;
+                }
+                // Parse the upper bound
+                let mut upper_bound = 0u32;
+                loop {
+                    chr = iter.next().cloned().ok_or(PatError::ManyInvalid)?;
+                    match chr {
+                        b']' => break,
+                        chr @ b'0'..=b'9' => {
+                            upper_bound = upper_bound * 10 + (chr - b'0') as u32;
+                            if upper_bound >= 16384 {
+                                return Err(PatError::ManyOverflow);
+                            }
+                        },
+                        _ => return Err(PatError::ManyInvalid),
+                    }
+                }
+                // Lower bound should be strictly less than the upper bound
+                if lower_bound < upper_bound {
+                    let many_skip = upper_bound - lower_bound;
+                    if many_skip >= 256 {
+                        result.push(Atom::Rangext((many_skip >> 8) as u8));
+                    }
+                    result.push(Atom::Many((many_skip & 0xff) as u8));
+                }
+                else {
+                    return Err(PatError::ManyRange);
+                }
+            },
+            // Match a byte
+            b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' | b'.' => {
+                let mut mask = 0xFF;
+
+                // High nibble of the byte
+                let hi = if chr == b'.' { mask &= 0x0F;0 }
+                else if chr >= b'a' { chr - b'a' + 10 }
+                else if chr >= b'A' { chr - b'A' + 10 }
+                else { chr - b'0' };
+
+                chr = iter.next().cloned().ok_or(PatError::UnpairedHexDigit)?;
+                // Low nibble of the byte
+                let lo = if chr >= b'a' && chr <= b'f' { chr - b'a' + 10 }
+                else if chr >= b'A' && chr <= b'F' { chr - b'A' + 10 }
+                else if chr >= b'0' && chr <= b'9' { chr - b'0' }
+                else if chr == b'.' { mask &= 0xF0; 0 }
+                else { return Err(PatError::UnpairedHexDigit); };
+
+                if mask == 0 { return Err(PatError::DoubleNibble); };
+
+                // mask out nibble
+                if mask != 0xFF { result.push(Atom::Fuzzy(mask)) }
+
+                // Add byte to the pattern
+                result.push(Atom::Byte((hi << 4) + lo));
+            },
+            // Match raw bytes
+            b'"' => {
+                loop {
+                    if let Some(chr) = iter.next().cloned() {
+                        if chr != b'"' {
+                            result.push(Atom::Byte(chr));
+                        }
+                        else {
+                            break;
+                        }
+                    }
+                    else {
+                        return Err(PatError::UnclosedQuote);
+                    }
+                }
+            },
+            // Save the cursor
+            b'\'' => {
+                // 'Limited' save space
+                if save >= u8::MAX {
+                    return Err(PatError::SaveOverflow);
+                }
+                result.push(Atom::Save(save));
+                save += 1;
+            },
+            // Skip bytes
+            b'?' => {
+                // match result.last_mut() {
+                // 	Some(Atom::Skip(skip)) if *skip != PTR_SKIP && *skip < 127i8 => *skip += 1,
+                // 	_ => result.push(Atom::Skip(1)),
+                // };
+                // Coalescence skips together
+                if let Some(Atom::Skip(skip)) = result.last_mut() {
+                    if *skip != PTR_SKIP && *skip < 255u8 {
+                        *skip += 1;
+                        continue;
+                    }
+                }
+                result.push(Atom::Skip(1));
+            },
+
+            b'=' => {
+                let op = iter.next().cloned().ok_or(PatError::CheckOperand)?;
+                result.push( match op {
+                    b'0'..=b'9' => Atom::Check(op - b'0'),
+                    b'A'..=b'Z' => Atom::Check(10 + (op - b'A')),
+                    b'a'..=b'z' => Atom::Check(10 + (op - b'a')),
+                    _ => return Err(PatError::CheckOperand)
+                });
+            },
+            b'@' => {
+                let op = iter.next().cloned().ok_or(PatError::AlignedOperand)?;
+                result.push( match op {
+                    b'0'..=b'9' => Atom::Aligned(op - b'0'),
+                    b'A'..=b'Z' => Atom::Aligned(10 + (op - b'A')),
+                    b'a'..=b'z' => Atom::Aligned(10 + (op - b'a')),
+                    _ => return Err(PatError::AlignedOperand)
+                });
+            },
+            b'i' => {
+                let atom = match iter.next().cloned() {
+                    Some(b'1') => Atom::ReadI8(save),
+                    Some(b'2') => Atom::ReadI16(save),
+                    Some(b'4') => Atom::ReadI32(save),
+                    _ => return Err(PatError::ReadOperand),
+                };
+                if save >= u8::MAX {
+                    return Err(PatError::SaveOverflow);
+                }
+                save += 1;
+                result.push(atom);
+            },
+            b'u' => {
+                let atom = match iter.next().cloned() {
+                    Some(b'1') => Atom::ReadU8(save),
+                    Some(b'2') => Atom::ReadU16(save),
+                    Some(b'4') => Atom::ReadU32(save),
+                    _ => return Err(PatError::ReadOperand),
+                };
+                if save >= u8::MAX {
+                    return Err(PatError::SaveOverflow);
+                }
+                save += 1;
+                result.push(atom);
+            },
+            b'z' => {
+                if save >= u8::MAX {
+                    return Err(PatError::SaveOverflow);
+                }
+                result.push(Atom::Zero(save));
+                save += 1;
+            },
+
+
+            // Allow spaces as padding
+            b' ' | b'\n' | b'\r' | b'\t' => {},
+            // Everything else is illegal
+            _ => {
+                return Err(PatError::UnknownChar);
+            },
+        }
+        // Converted from str originally, should be safe
+        *pat = unsafe { str::from_utf8_unchecked(iter.as_slice()) };
+    }
+    // Check balanced stack operators
+    if depth != 0 {
+        return Err(PatError::StackError);
+    }
+    // Check if sub patterns are balanced
+    if subs.len() != 0 {
+        return Err(PatError::SubPattern);
+    }
+
+    // Remove redundant atoms at the end
+    fn is_redundant(atom: &Atom) -> bool {
+        match atom {
+            | Atom::Skip(_)
+            | Atom::Rangext(_)
+            | Atom::Pop
+            | Atom::Many(_) => true,
+            _ => false,
+        }
+    }
+    while result.last().map(is_redundant).unwrap_or(false) {
+        result.pop();
+    }
+
+    Ok(())
+}
--- a/sub/xpat/Cargo.toml
+++ b/sub/xpat/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "sub_xpat"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+sub_core.workspace = true
+sub_macros.workspace = true
--- a/sub/xpat/src/atoms.rs
+++ b/sub/xpat/src/atoms.rs
@@ -0,0 +1,83 @@
+pub type Pattern<'l> = &'l[Atom];
+
+/// Pattern atoms.
+///
+/// The scanner will silently ignore nonsensical arguments.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub enum Atom {
+    /// Matches a single byte.
+    Byte(u8),
+    /// Captures the cursor in the save array at the specified index.
+    Save(u8),
+    /// After a Pop later continue matching at the current cursor plus the argument.
+    Push(u8),
+    /// Pops the cursor from the stack and continues matching.
+    Pop,
+    /// Sets a mask to apply on next byte match.
+    Fuzzy(u8),
+    /// Skips a fixed number of bytes.
+    Skip(u8),
+    /// Rewinds the cursor a fixed number of bytes.
+    Back(u8),
+    /// Extends the push, skip, back and many range by `argument * 256`.
+    Rangext(u8),
+    /// Looks for the next pattern at most a certain number of bytes ahead.
+    Many(u8),
+    /// Follows a signed 1 byte jump.
+    ///
+    /// Reads the byte under the cursor, sign extends it, adds it plus 1 to the cursor and continues matching.
+    Jump1,
+    /// Follows a signed 4 byte jump.
+    ///
+    /// Reads the dword under the cursor and adds it plus 4 to the cursor and continues matching.
+    Jump4,
+    /// Follows an absolute pointer.
+    ///
+    /// Reads the pointer under the cursor, translates it to an RVA, assigns it to the cursor and continues matching.
+    ///
+    /// Matching fails immediately when translation to an RVA fails.
+    Ptr,
+    /// Follows a position independent reference.
+    ///
+    /// Reads the dword under the cursor and adds it to the saved cursor for the given slot and continues matching.
+    Pir(u8),
+    /// Compares the cursor with the value in the given save slot and fails if they're not equal.
+    Check(u8),
+    /// Checks if the cursor is aligned to `(1 << value)`.
+    Aligned(u8),
+    /// Reads and sign-extends the byte under the cursor, writes to the given slot and advances the cursor by 1.
+    ReadI8(u8),
+    /// Reads and zero-extends the byte under the cursor, writes to the given slot and advances the cursor by 1.
+    ReadU8(u8),
+    /// Reads and sign-extends the word under the cursor, writes to the given slot and advances the cursor by 2.
+    ReadI16(u8),
+    /// Reads and zero-extends the word under the cursor, writes to the given slot and advances the cursor by 2.
+    ReadU16(u8),
+    /// Reads the dword under the cursor, writes to the given slot and advances the cursor by 4.
+    ReadI32(u8),
+    /// Reads the dword under the cursor, writes to the given slot and advances the cursor by 4.
+    ReadU32(u8),
+    /// Writes zero to the given save slot.
+    Zero(u8),
+    /// Sets a retry point when matching fails.
+    ///
+    /// When matching fails the cursor is restored and matching begins again skipping _N_ atoms.
+    Case(u8),
+    /// Continues matching after a case atom, skipping the next _N_ atoms.
+    Break(u8),
+    /// Null instruction, used to make the parser easier to write.
+    Nop,
+}
+
+impl Atom {
+    pub fn save_len(pat: &[Atom]) -> usize {
+        pat.iter().filter_map(|&atom| {
+            match atom {
+                Atom::Save(slot) | Atom::Pir(slot) | Atom::Check(slot) | Atom::Zero(slot) |
+                Atom::ReadI8(slot) | Atom::ReadI16(slot) | Atom::ReadI32(slot) |
+                Atom::ReadU8(slot) | Atom::ReadU16(slot)| Atom::ReadU32(slot) => Some(slot as usize + 1),
+                _ => None,
+            }
+        }).max().unwrap_or(0)
+    }
+}
--- a/sub/xpat/src/lib.rs
+++ b/sub/xpat/src/lib.rs
@@ -0,0 +1,28 @@
+#![no_std] #![allow(unused)]
+
+pub mod atoms {
+    include!("atoms.rs");
+}
+
+pub mod scannable;
+pub mod scanner;
+
+
+//
+// Export Preludes:
+//
+
+pub mod prelude {
+    pub use sub_macros::pattern;
+    pub use crate::atoms::Pattern;
+    pub use crate::scanner::Scanner;
+}
+
+pub mod public {
+    pub use crate::atoms::Atom;
+    pub use crate::scannable::Scannable;
+    pub use crate::scanner::{
+        exec, scan_for_aob, make_aob
+    };
+}
+
--- a/sub/xpat/src/scannable.rs
+++ b/sub/xpat/src/scannable.rs
@@ -0,0 +1,34 @@
+use core::ops::Range;
+
+pub trait Scannable {
+    /// get total bounds
+    fn range(&self) -> Range<usize>;
+
+    /// gets the chunk at the supplied address if there is one
+    fn chunk_at(&self, address: usize) -> Option<&[u8]>;
+
+    /// given an address will return the next chunk, None if there are no more hcunks
+    fn next_chunk(&self, address: usize) -> Option<(usize, &[u8])>;
+
+}
+
+
+impl Scannable for [u8] {
+    fn range(&self) -> Range<usize> { 0..self.len() }
+    fn chunk_at(&self, address: usize) -> Option<&[u8]> {
+        self.get(address..)
+    }
+    fn next_chunk(&self, _address: usize) -> Option<(usize, &[u8])> { None }
+}
+
+/// In case you want to scan with a specific address
+impl Scannable for (usize, &[u8]) {
+    fn range(&self) -> Range<usize> { self.0..(self.0 + self.1.len()) }
+    fn chunk_at(&self, address: usize) -> Option<&[u8]> {
+        match address.overflowing_sub(self.0) {
+            (address, false) => self.1.get(address..),
+            (_, true) => None,
+        }
+    }
+    fn next_chunk(&self, _address: usize) -> Option<(usize, &[u8])> { None }
+}
--- a/sub/xpat/src/scanner.rs
+++ b/sub/xpat/src/scanner.rs
@@ -0,0 +1,418 @@
+use core::ops::{Range, RangeBounds, Bound};
+use crate::atoms::{Pattern, Atom};
+use crate::scannable::Scannable;
+use sub_core::{pod::Pod};
+
+const SKIP_VA: u32 = size_of::<usize>() as u32;
+
+pub struct Scanner<'a, S: Scannable + ?Sized> {
+    /// the binary to be scanned
+    bin: &'a S,
+
+    /// the pattern
+    pat: Pattern<'a>,
+
+    /// the range to search for the pattern in
+    range: Range<usize>,
+
+    /// the current cursor position
+    cursor: usize,
+}
+
+impl<'a, S: Scannable + ?Sized> Scanner<'a, S> {
+
+    pub fn new(bin: &'a S, pat: Pattern<'a>, r: impl RangeBounds<usize>) -> Self {
+        let range = limit_range(bin, r);
+        let cursor = range.start;
+        Self { bin, pat, range, cursor }
+    }
+
+    pub fn next(&mut self, saves: &mut [usize]) -> bool {
+        let mut aob = <[u8; 0x10] as Pod>::uninit();
+        let aob = make_aob(self.pat, &mut aob);
+
+        match !aob.is_empty() {
+            true => {
+                let upper_limit = self.range.end;
+                while let Some(address) = scan_for_aob(self.bin, self.cursor..upper_limit, aob) {
+                    self.cursor = address + 1;
+                    if exec(self.bin, address, self.pat, saves, self.range.clone()) {
+                        return true;
+                    }
+                }
+                false
+            }
+            false => {
+                while self.range.contains(&self.cursor) {
+                    let current_cursor = self.cursor;
+                    self.cursor += 1;
+                    if exec(self.bin, current_cursor, self.pat, saves, self.range.clone()) {
+                        return true;
+                    }
+                }
+                false
+            }
+        }
+    }
+}
+
+#[inline(always)]
+pub fn exec<Binary: Scannable + ?Sized>(
+    bin: &Binary,
+    address: usize,
+    pattern: Pattern,
+    saves: &mut [usize],
+    range: Range<usize>,
+) -> bool {
+
+    let mut cursor = address;
+    let mut pc = 0;
+
+    // pattern state
+    let mut mask = 0xff;
+    let mut ext_range = 0u32;
+
+    #[inline(always)]
+    fn read<B: Scannable + ?Sized, T: Sized + Copy>(bin: &B, address: usize) -> Option<T> {
+        let slice = bin.chunk_at(address)?;
+        if slice.len() >= size_of::<T>() {
+            return Some(unsafe { (slice.as_ptr() as *const T).read_unaligned() });
+        }
+        None
+    }
+
+    while let Some(atom) = pattern.get(pc).cloned() {
+        pc += 1;
+        match atom {
+
+            // Compare bytes
+            Atom::Byte(pat_byte) => {
+                let Some(byte) = read::<_, u8>(bin, cursor) else { return false; };
+                if byte & mask != pat_byte & mask { return false; }
+                cursor += 1;
+                mask = 0xFF;
+            }
+
+            // save the current address
+            Atom::Save(slot_idx) => {
+                if let Some(slot) = saves.get_mut(slot_idx as usize) {
+                    *slot = cursor;
+                }
+            }
+
+            Atom::Push(skip) => {
+                let skip = ext_range + skip as u32;
+                let skip = if skip == 0 { SKIP_VA } else { skip };
+
+                // start running the pattern from pc...
+                if !exec(bin, cursor, &pattern[pc..], saves, range.clone()) {
+                    return false;
+                }
+                cursor = cursor.wrapping_add(skip as usize);
+                mask = 0xff;
+                ext_range = 0;
+
+                // Iterate forward in the pattern looking for the POP for this push...
+                let mut counter = 1;
+                while counter != 0 {
+                    // keep incrementing the pc so the next atom will be the one after pop
+                    match pattern.get(pc) {
+                        Some(Atom::Push(_)) => counter += 1,
+                        Some(Atom::Pop) => counter -= 1,
+                        None => return true,
+                        _ => (/**/)
+                    }
+                    pc += 1;
+                }
+            }
+
+            Atom::Pop => {
+                return true;
+            }
+
+            Atom::Fuzzy(pat_mask) => {
+                mask = pat_mask;
+            }
+
+            Atom::Skip(skip) => {
+                let skip = ext_range + skip as u32;
+                let skip = if skip == 0 { SKIP_VA } else { skip };
+                cursor = cursor.wrapping_add(skip as usize);
+                ext_range = 0;
+            }
+
+            Atom::Back(back) => {
+                let rewind = ext_range + back as u32;
+                let rewind = if rewind == 0 { SKIP_VA } else { rewind };
+                cursor = cursor.wrapping_sub(rewind as usize);
+                ext_range = 0;
+            }
+
+            Atom::Rangext(ext) => {
+                ext_range = ext as u32 * 256;
+            }
+
+            Atom::Many(limit) => {
+                let limit = ext_range + limit as u32;
+                return exec_many(bin, cursor, &pattern[pc..], saves, range, limit);
+            }
+
+            Atom::Jump1 => {
+                let Some(sbyte) = read::<_, i8>(bin, cursor) else { return false };
+                cursor = cursor.wrapping_add(sbyte as usize).wrapping_add(1);
+            }
+
+            Atom::Jump4 => {
+                let Some(sdword) = read::<_, i32>(bin, cursor) else { return false };
+                cursor = cursor.wrapping_add(sdword as usize).wrapping_add(4);
+            }
+
+            Atom::Ptr => {
+                let Some(sptr) = read::<_, usize>(bin, cursor) else { return false };
+                cursor = sptr;
+            }
+
+            Atom::Pir(slot) => {
+                let Some(sdword) = read::<_, i32>(bin, cursor) else { return false };
+                let base = saves.get(slot as usize).cloned().unwrap_or(cursor);
+                cursor = base.wrapping_add(sdword as usize);
+            }
+
+            Atom::Check(slot) => {
+                if let Some(&rva) = saves.get(slot as usize) {
+                    if rva != cursor { return false; }
+                }
+            }
+
+            Atom::Aligned(align) => {
+                if cursor & ((1 << align) - 1) != 0 {
+                    return false;
+                }
+            }
+
+            Atom::ReadU8(slot) => {
+                let Some(value) = read::<_, u8>(bin, cursor) else { return false };
+                if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ }
+            }
+            Atom::ReadI8(slot) => {
+                let Some(value) = read::<_, i8>(bin, cursor) else { return false };
+                if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ }
+            }
+            Atom::ReadU16(slot) => {
+                let Some(value) = read::<_, u16>(bin, cursor) else { return false };
+                if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ }
+            }
+            Atom::ReadI16(slot) => {
+                let Some(value) = read::<_, i16>(bin, cursor) else { return false };
+                if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ }
+            }
+            Atom::ReadU32(slot) => {
+                let Some(value) = read::<_, u32>(bin, cursor) else { return false };
+                if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ }
+            }
+            Atom::ReadI32(slot) => {
+                let Some(value) = read::<_, i32>(bin, cursor) else { return false };
+                if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ }
+            }
+            Atom::Zero(slot) => {
+                if let Some(slot) = saves.get_mut(slot as usize) {
+                    *slot = 0;
+                }
+            }
+            Atom::Case(next) => {
+                if exec(bin, cursor, pattern, saves, range.clone()) {
+                    // same as Push/Pop except we add the next from the break to the pc.
+                    let mut counter = 1;
+                    loop {
+                        pc += 1;
+                        match pattern.get(pc) {
+                            Some(Atom::Case(_)) => counter += 1,
+                            Some(Atom::Break(next)) => {
+                                counter -= 1;
+                                if counter == 0 {
+                                    pc += *next as usize
+                                }
+                            }
+                            None => return true,
+                            _ => (/**/)
+                        }
+                    }
+                } else {
+                    // if the case fails go to the location defined by next
+                    pc += next as usize;
+                }
+            }
+            Atom::Break(_next) => {
+                return true;
+            }
+            Atom::Nop => {}
+        }
+    }
+    true
+}
+
+
+#[inline(always)]
+pub fn exec_many<Binary: Scannable + ?Sized >(
+    bin: &Binary,
+    address: usize,
+    pattern: Pattern,
+    saves: &mut [usize],
+    range: Range<usize>,
+    limit: u32,
+) -> bool {
+    let mut aob = <[u8; 0x10] as Pod>::uninit();
+    let aob = make_aob(pattern, &mut aob);
+
+    let Some(chunk) = bin.chunk_at(address) else {
+        // pattern fails before we even try (out of bounds)
+        return false;
+    };
+
+    match !aob.is_empty() {
+        true => {
+            let upper_limit = address + limit as usize;
+            let mut cursor = address;
+            while let Some(address) = scan_for_aob(bin, cursor..upper_limit, aob) {
+                cursor = address;
+                if exec(bin, cursor, pattern, saves, range.clone()) {
+                    return true;
+                }
+                cursor += 1;
+            }
+            false
+        }
+        false => {
+            // try to reduce the limit just in-case we can squeeze some perf out of it
+            for i in 0..(limit as usize).min(chunk.len()) {
+                if exec(bin, address + i, pattern, saves, range.clone()) {
+                    return true;
+                }
+            }
+            false
+        }
+    }
+}
+
+
+#[inline(always)]
+pub fn scan_for_aob<Binary: Scannable + ?Sized>(
+    bin: &Binary,
+    range: Range<usize>,
+    aob: &[u8],
+) -> Option<usize> {
+    let mut address = range.start;
+    let upper_bounds = range.end;
+
+
+    while address < upper_bounds {
+
+        // get the current chunk for the given address
+        let chunk = match bin.chunk_at(address) {
+            Some(chunk) => chunk,
+
+            // the address is out of bounds, try to shift the address so its back in b ounds
+            None => match bin.next_chunk(address) {
+
+                // the next chunk is in bounds so we will just correct the address and use that chunk instead
+                Some((naddr, nchunk)) if naddr < upper_bounds => {
+                    address = naddr;
+                    nchunk
+                }
+
+                // no hope, give up
+                _ => return None,
+            }
+        };
+
+        // try to find the aob in the current chunk
+        if let Some(offset) = chunk.windows(aob.len())
+            .take(upper_bounds.saturating_sub(address)).position(|c| c == aob) {
+            // we got a hit, return it
+            return Some(address + offset)
+        }
+
+
+        // the AOB was not found in the current chunk, now check if its contiguous between chunks:
+        if let Some((naddr, nchunk)) = bin.next_chunk(address) {
+
+            // next chunk is out of bounds, give up
+            if naddr - aob.len() > upper_bounds { return None }
+
+            // if chunks are contiguous and the aob is greater than one byte,
+            //  check if the aob is on a chunk border
+            if address + chunk.len() == naddr && aob.len() > 1 {
+                // check if the aob is between two chunks :)
+                for i in 1..aob.len()-1 {
+                    let (p1, p2) = aob.split_at(i);
+                    if chunk.ends_with(p1) && nchunk.starts_with(p2) {
+                        // aob was found between two chunks
+                        // return this address
+                        return Some(address + chunk.len() - i)
+                    }
+                }
+            }
+
+            // start scanning the next chunk
+            let naddr = naddr - aob.len();
+            debug_assert!(naddr > address);
+            address = naddr;
+        } else {
+            return None
+        }
+    }
+    None
+
+}
+
+
+/// Limits a selected range into the range of the binary...
+fn limit_range<Binary: Scannable + ?Sized>(
+    bin: &Binary,
+    range: impl RangeBounds<usize>,
+) -> Range<usize> {
+    let bin_range = bin.range();
+    let start = match range.start_bound() {
+        Bound::Included(v) => bin_range.start.max(*v),
+        Bound::Excluded(v) => bin_range.start.max(v.saturating_add(1)),
+        Bound::Unbounded => bin_range.start,
+    };
+    let end = match range.end_bound() {
+        Bound::Included(v) => bin_range.end.min(v.saturating_add(1)),
+        Bound::Excluded(v) => bin_range.end.min(*v),
+        Bound::Unbounded => bin_range.end
+    };
+    start..end
+}
+
+/// builds an array of bytes from the start of the pattern.
+pub fn make_aob<'b>(pattern: &[Atom], buffer: &'b mut [u8]) -> &'b [u8] {
+    let mut i = 0;
+    for atoms in pattern {
+        match atoms {
+            Atom::Zero(_) => (/* do nothing */),
+            Atom::Save(_) => (/* do nothing */),
+            Atom::Byte(b) => {
+                buffer[i] = *b;
+                i += 1;
+            }
+            _ => break,
+        }
+        if i >= buffer.len() {
+            break;
+        }
+    }
+    &buffer[..i]
+}
+
+
+
+
+
+
+
+
+
+
+
+
--- a/x/Cargo.toml
+++ b/x/Cargo.toml
@@ -10,7 +10,7 @@ macros = ["sub_macros"]
 libm = ["sub_libm"]
 pe = ["sub_pe"]

-
+xpat = ["core", "sub_xpat", "macros"]
 winuser = ["sub_winu", "pe", "sub_pe/windows"]

 [dependencies]
@@ -18,4 +18,5 @@ sub_core = { workspace = true, optional = true }
 sub_libm = { workspace = true, optional = true}
 sub_pe = { workspace = true, optional = true }
 sub_winu = { workspace = true, optional = true }
-sub_macros = { workspace = true, optional = true }
+sub_macros = { workspace = true, optional = true }
+sub_xpat = { workspace = true, optional = true }
--- a/x/src/lib.rs
+++ b/x/src/lib.rs
@@ -18,6 +18,7 @@ import!(sub_core, core, "core");
 import!(sub_libm, libm, "libm");
 import!(sub_pe, pe, "pe");
 import!(sub_winu, win, "winuser");
+import!(sub_xpat, xpat, "xpat");

 /// the macro crate is a proc macro, so it is a bit different.
 #[cfg(feature = "macros")]
--- a/x/tests/test_xpat.rs
+++ b/x/tests/test_xpat.rs
@@ -0,0 +1,17 @@
+
+
+
+#[test]
+pub fn test_pattern() {
+
+    let pattern = x::pattern!("E8 [0-4] BB ");
+    let buffer: &[u8] = &[ 0xAA, 0xE8, 0xBB, 0xE8, 0x00, 0xBB, ];
+
+    let mut scanner = x::Scanner::new(buffer, pattern, ..);
+    let mut saves = [0usize;8];
+
+    assert!(scanner.next(&mut saves));
+    assert_eq!(saves[0], 1);
+    assert!(scanner.next(&mut saves));
+    assert_eq!(saves[0], 3);
+}