From 8b8b506319faed0a815d60bdcde18f501120add5 Mon Sep 17 00:00:00 2001
From: Intege-rs <me@jessiep.me>
Date: Fri, 28 Nov 2025 02:12:33 -0500
Subject: [PATCH] expose parser.rs via a hack

---
 sub/_macros/src/lib.rs      |   3 +
 sub/_macros/src/patterns.rs | 487 +-----------------------------------
 sub/xpat/Cargo.toml         |   3 +
 sub/xpat/src/hexdump.rs     |   6 +-
 sub/xpat/src/lib.rs         |   6 +
 sub/xpat/src/parser.rs      | 477 +++++++++++++++++++++++++++++++++++
 6 files changed, 503 insertions(+), 479 deletions(-)
 create mode 100644 sub/xpat/src/parser.rs

diff --git a/sub/_macros/src/lib.rs b/sub/_macros/src/lib.rs
index d712b46..253dbe2 100644
--- a/sub/_macros/src/lib.rs
+++ b/sub/_macros/src/lib.rs
@@ -1,5 +1,8 @@
 #![allow(unused)]
 
+// xpat's parser requires this
+extern crate alloc;
+
 mod from_repr;
 mod patterns;
 
diff --git a/sub/_macros/src/patterns.rs b/sub/_macros/src/patterns.rs
index 4ede814..59bbbad 100644
--- a/sub/_macros/src/patterns.rs
+++ b/sub/_macros/src/patterns.rs
@@ -1,6 +1,15 @@
 use core::{cmp, fmt, mem, str};
 use proc_macro::{Literal, TokenStream, TokenTree};
 
+
+mod atoms {
+    include!("../../xpat/src/atoms.rs");
+}
+
+mod parser {
+    include!("../../xpat/src/parser.rs");
+}
+
 /// Compile time pattern parser.
 ///
 /// ```ignore
@@ -14,7 +23,7 @@ pub fn proc_pattern(input: TokenStream) -> TokenStream {
         _e => panic!("expected a single string literal to parse, got: {_e:?}"),
     };
 
-    let pattern = match parse(&string) {
+    let pattern = match parser::parse(&string) {
         Ok(pattern) => pattern,
         Err(err) => panic!("invalid pattern syntax: {}", err),
     };
@@ -52,479 +61,3 @@ fn parse_str_literal(input: &Literal) -> String {
     }
     string
 }
-
-
-/// Special skip value to indicate to use platform pointer size instead.
-pub(crate) const PTR_SKIP: u8 = 0;
-
-/// Pattern parsing error.
-#[derive(Copy, Clone, Debug, Eq, PartialEq)]
-pub struct ParsePatError {
-    kind: PatError,
-    position: usize,
-}
-impl fmt::Display for ParsePatError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "Syntax Error @{}: {}.", self.position, self.kind.to_str())
-    }
-}
-
-#[derive(Copy, Clone, Debug, Eq, PartialEq)]
-enum PatError {
-    UnpairedHexDigit,
-    UnknownChar,
-    ManyOverflow,
-    ManyRange,
-    ManyInvalid,
-    SaveOverflow,
-    StackError,
-    StackInvalid,
-    UnclosedQuote,
-    AlignedOperand,
-    CheckOperand,
-    ReadOperand,
-    SubPattern,
-    SubOverflow,
-    DoubleNibble
-}
-impl PatError {
-    fn to_str(self) -> &'static str {
-        match self {
-            PatError::UnpairedHexDigit => "unpaired hex digit",
-            PatError::UnknownChar => "unknown character",
-            PatError::ManyOverflow => "many range exceeded",
-            PatError::ManyRange => "many bounds nonsensical",
-            PatError::ManyInvalid => "many invalid syntax",
-            PatError::SaveOverflow => "save store overflow",
-            PatError::StackError => "stack unbalanced",
-            PatError::StackInvalid => "stack must follow jump",
-            PatError::UnclosedQuote => "string missing end quote",
-            PatError::AlignedOperand => "aligned operand error",
-            PatError::CheckOperand => "aligned operand error",
-            PatError::ReadOperand => "read operand error",
-            PatError::SubPattern => "sub pattern error",
-            PatError::SubOverflow => "sub pattern too large",
-            PatError::DoubleNibble => "unpaired nibble wildcard",
-        }
-    }
-}
-
-//----------------------------------------------------------------
-
-include!("../../xpat/src/atoms.rs");
-
-/// Pattern parser.
-///
-/// # Remarks
-///
-/// Following are examples of the pattern syntax.
-/// The syntax takes inspiration from [YARA hexadecimal strings](https://yara.readthedocs.io/en/v3.7.0/writingrules.html#hexadecimal-strings).
-///
-/// ```text
-/// 55 89 e5 83 ? ec
-/// ```
-///
-/// Case insensitive hexadecimal characters match the exact byte pattern and question marks serve as placeholders for unknown bytes.
-///
-/// Note that a single question mark matches a whole byte. The syntax to mask part of a byte is not yet available.
-///
-/// Spaces (code point 32) are completely optional and carry no semantic meaning, their purpose is to visually group things together.
-///
-/// ```text
-/// b9 ' 37 13 00 00
-/// ```
-///
-/// Single quotes are used as a bookmarks, to save the current cursor rva in the save array passed to the scanner.
-///
-/// It is no longer necessary to do tedious address calculations to read information out of the byte stream after a match was found.
-/// This power really comes to life with the capability to follow relative and absolute references.
-///
-/// The first entry in the save array is reserved for the rva where the pattern was matched.
-/// The rest of the save array is filled in order of appearance of the quotes. Here the rva of the quote can be found in `save[1]`.
-///
-/// ```text
-/// b8 [16] 50 [13-42] ff
-/// ```
-///
-/// Pairs of decimal numbers separated by a hypen in square brackets indicate the lower and upper bound of number of bytes to skip.
-/// The scanner is non greedy and considers the first match while skipping as little as possible.
-///
-/// A single decimal number in square brackets without hypens is a fixed size jump, equivalent to writing that number of consecutive question marks.
-///
-/// ```text
-/// 31 c0 74 % ' c3
-/// e8 $ ' 31 c0 c3
-/// 68 * ' 31 c0 c3
-/// ```
-///
-/// These symbols are used to follow; a signed 1 byte relative jump: `%`, a signed 4 byte relative jump: `$` and an absolute pointer: `*`.
-///
-/// They are designed to be able to have the scanner follow short jumps, calls and longer jumps, and absolute pointers.
-///
-/// Composes really well with bookmarks to find the addresses of referenced functions and other data without tedious address calculations.
-///
-/// ```text
-/// b8 * "STRING" 00
-/// ```
-///
-/// String literals appear in double quotes and will be matched as UTF-8.
-///
-/// Escape sequences are not supported, switch back to matching with hex digits as needed.
-/// For UTF-16 support, you are welcome to send a PR.
-///
-/// ```text
-/// e8 $ { ' } 83 f0 5c c3
-/// ```
-///
-/// Curly braces must follow a jump symbol (see above).
-///
-/// The sub pattern enclosed within the curly braces is matched at the destination after following the jump.
-/// After the pattern successfully matched, the cursor returns to before the jump was followed.
-/// The bytes defining the jump are skipped and matching continues again from here.
-///
-/// ```text
-/// e8 $ @4
-/// ```
-///
-/// Checks that the cursor is aligned at this point in the scan.
-/// The align value is `(1 << arg)`, in this example the cursor is checked to be aligned to 16.
-///
-/// ```text
-/// e8 i1 a0 u4
-/// ```
-///
-/// An `i` or `u` indicates memory read operations followed by the size of the operand to read.
-///
-/// The read values are stored in the save array alongside the bookmarked addresses (single quotes).
-/// This means the values are sign- or zero- extended respectively before being stored.
-/// Operand sizes are 1 (byte), 2 (word) or 4 (dword).
-///
-/// The cursor is advanced by the size of the operand.
-///
-/// ```text
-/// 83 c0 2a ( 6a ? | 68 ? ? ? ? ) e8
-/// ```
-///
-/// Parentheses indicate alternate subpatterns separated by a pipe character.
-///
-/// The scanner attempts to match the alternate subpatterns from left to right and fails if none of them match.
-pub fn parse(pat: &str) -> Result<Vec<Atom>, ParsePatError> {
-    let mut result = Vec::with_capacity(pat.len() / 2);
-    let mut pat_end = pat;
-    match parse_helper(&mut pat_end, &mut result) {
-        Ok(()) => Ok(result),
-        Err(kind) => {
-            let position = pat_end.as_ptr() as usize - pat.as_ptr() as usize;
-            Err(ParsePatError { kind, position })
-        },
-    }
-}
-// This is preferable but currently limited by macro rules...
-// pub use crate::pattern as parse;
-fn parse_helper(pat: &mut &str, result: &mut Vec<Atom>) -> Result<(), PatError> {
-    result.push(Atom::Save(0));
-    let mut iter = pat.as_bytes().iter();
-    let mut save = 1;
-    let mut depth = 0;
-    #[derive(Default)]
-    struct SubPattern {
-        case: usize,
-        brks: Vec<usize>,
-        save: u8,
-        save_next: u8,
-        depth: u8,
-    }
-    let mut subs = Vec::<SubPattern>::new();
-    while let Some(mut chr) = iter.next().cloned() {
-        match chr {
-            // Follow signed 1 byte jump
-            b'%' => result.push(Atom::Jump1),
-            // Follow signed 4 byte jump
-            b'$' => result.push(Atom::Jump4),
-            // Follow pointer
-            b'*' => result.push(Atom::Ptr),
-            // Start recursive operator
-            b'{' => {
-                depth += 1;
-                // Must follow a jump operator and insert push before the jump
-                let atom = match result.last_mut() {
-                    Some(atom @ Atom::Jump1) => mem::replace(atom, Atom::Push(1)),
-                    Some(atom @ Atom::Jump4) => mem::replace(atom, Atom::Push(4)),
-                    Some(atom @ Atom::Ptr) => mem::replace(atom, Atom::Push(PTR_SKIP)),
-                    _ => return Err(PatError::StackInvalid),
-                };
-                result.push(atom);
-            },
-            // End recursive operator
-            b'}' => {
-                // Unbalanced recursion
-                if depth <= 0 {
-                    return Err(PatError::StackError);
-                }
-                depth -= 1;
-                result.push(Atom::Pop);
-            },
-            // Start subpattern
-            b'(' => {
-                subs.push(SubPattern::default());
-                let sub = subs.last_mut().unwrap();
-                // Keep the save and depth state
-                sub.save = save;
-                sub.depth = depth;
-                // Add a new case, update the case offset later
-                sub.case = result.len();
-                result.push(Atom::Case(0));
-            },
-            // Case subpattern
-            b'|' => {
-                // Should already have started a subpattern
-                let sub = subs.last_mut().ok_or(PatError::SubPattern)?;
-                // Update the save state
-                sub.save_next = cmp::max(sub.save_next, save);
-                save = sub.save;
-                depth = sub.depth;
-                // Add a break of the previous subpattern
-                sub.brks.push(result.len());
-                result.push(Atom::Break(0));
-                // Add a new case of the next subpattern
-                let case_offset = result.len() - sub.case - 1;
-                if case_offset >= 256 {
-                    return Err(PatError::SubOverflow);
-                }
-                result[sub.case] = Atom::Case(case_offset as u8);
-                sub.case = result.len();
-                result.push(Atom::Case(0));
-            },
-            // End subpattern
-            b')' => {
-                // Should already have started a subpattern
-                let sub = subs.pop().ok_or(PatError::SubPattern)?;
-                // Prepare for the next save
-                save = cmp::max(sub.save_next, save);
-                depth = sub.depth;
-                // Neutralize the last case, since there are no more
-                result[sub.case] = Atom::Nop;
-                // Fill in the breaks
-                for &brk in &sub.brks {
-                    let brk_offset = result.len() - brk - 1;
-                    if brk_offset >= 256 {
-                        return Err(PatError::SubOverflow);
-                    }
-                    result[brk] = Atom::Break(brk_offset as u8);
-                }
-            },
-            // Skip many operator
-            b'[' => {
-                // Parse the lower bound
-                let mut lower_bound = 0u32;
-                let mut at_least_one_char = false;
-                loop {
-                    chr = iter.next().cloned().ok_or(PatError::ManyInvalid)?;
-                    match chr {
-                        b'-' | b']' => break,
-                        chr @ b'0'..=b'9' => {
-                            at_least_one_char = true;
-                            lower_bound = lower_bound * 10 + (chr - b'0') as u32;
-                            if lower_bound >= 16384 {
-                                return Err(PatError::ManyOverflow);
-                            }
-                        },
-                        _ => return Err(PatError::ManyInvalid),
-                    }
-                }
-                if !at_least_one_char {
-                    return Err(PatError::ManyInvalid);
-                }
-                // Turn the lower bound into skip ops
-                if lower_bound > 0 {
-                    if lower_bound >= 256 {
-                        result.push(Atom::Rangext((lower_bound >> 8) as u8));
-                    }
-                    result.push(Atom::Skip((lower_bound & 0xff) as u8));
-                }
-                // Second many part is optional
-                if chr == b']' {
-                    continue;
-                }
-                // Parse the upper bound
-                let mut upper_bound = 0u32;
-                loop {
-                    chr = iter.next().cloned().ok_or(PatError::ManyInvalid)?;
-                    match chr {
-                        b']' => break,
-                        chr @ b'0'..=b'9' => {
-                            upper_bound = upper_bound * 10 + (chr - b'0') as u32;
-                            if upper_bound >= 16384 {
-                                return Err(PatError::ManyOverflow);
-                            }
-                        },
-                        _ => return Err(PatError::ManyInvalid),
-                    }
-                }
-                // Lower bound should be strictly less than the upper bound
-                if lower_bound < upper_bound {
-                    let many_skip = upper_bound - lower_bound;
-                    if many_skip >= 256 {
-                        result.push(Atom::Rangext((many_skip >> 8) as u8));
-                    }
-                    result.push(Atom::Many((many_skip & 0xff) as u8));
-                }
-                else {
-                    return Err(PatError::ManyRange);
-                }
-            },
-            // Match a byte
-            b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' | b'.' => {
-                let mut mask = 0xFF;
-
-                // High nibble of the byte
-                let hi = if chr == b'.' { mask &= 0x0F;0 }
-                else if chr >= b'a' { chr - b'a' + 10 }
-                else if chr >= b'A' { chr - b'A' + 10 }
-                else { chr - b'0' };
-
-                chr = iter.next().cloned().ok_or(PatError::UnpairedHexDigit)?;
-                // Low nibble of the byte
-                let lo = if chr >= b'a' && chr <= b'f' { chr - b'a' + 10 }
-                else if chr >= b'A' && chr <= b'F' { chr - b'A' + 10 }
-                else if chr >= b'0' && chr <= b'9' { chr - b'0' }
-                else if chr == b'.' { mask &= 0xF0; 0 }
-                else { return Err(PatError::UnpairedHexDigit); };
-
-                if mask == 0 { return Err(PatError::DoubleNibble); };
-
-                // mask out nibble
-                if mask != 0xFF { result.push(Atom::Fuzzy(mask)) }
-
-                // Add byte to the pattern
-                result.push(Atom::Byte((hi << 4) + lo));
-            },
-            // Match raw bytes
-            b'"' => {
-                loop {
-                    if let Some(chr) = iter.next().cloned() {
-                        if chr != b'"' {
-                            result.push(Atom::Byte(chr));
-                        }
-                        else {
-                            break;
-                        }
-                    }
-                    else {
-                        return Err(PatError::UnclosedQuote);
-                    }
-                }
-            },
-            // Save the cursor
-            b'\'' => {
-                // 'Limited' save space
-                if save >= u8::MAX {
-                    return Err(PatError::SaveOverflow);
-                }
-                result.push(Atom::Save(save));
-                save += 1;
-            },
-            // Skip bytes
-            b'?' => {
-                // match result.last_mut() {
-                // 	Some(Atom::Skip(skip)) if *skip != PTR_SKIP && *skip < 127i8 => *skip += 1,
-                // 	_ => result.push(Atom::Skip(1)),
-                // };
-                // Coalescence skips together
-                if let Some(Atom::Skip(skip)) = result.last_mut() {
-                    if *skip != PTR_SKIP && *skip < 255u8 {
-                        *skip += 1;
-                        continue;
-                    }
-                }
-                result.push(Atom::Skip(1));
-            },
-
-            b'=' => {
-                let op = iter.next().cloned().ok_or(PatError::CheckOperand)?;
-                result.push( match op {
-                    b'0'..=b'9' => Atom::Check(op - b'0'),
-                    b'A'..=b'Z' => Atom::Check(10 + (op - b'A')),
-                    b'a'..=b'z' => Atom::Check(10 + (op - b'a')),
-                    _ => return Err(PatError::CheckOperand)
-                });
-            },
-            b'@' => {
-                let op = iter.next().cloned().ok_or(PatError::AlignedOperand)?;
-                result.push( match op {
-                    b'0'..=b'9' => Atom::Aligned(op - b'0'),
-                    b'A'..=b'Z' => Atom::Aligned(10 + (op - b'A')),
-                    b'a'..=b'z' => Atom::Aligned(10 + (op - b'a')),
-                    _ => return Err(PatError::AlignedOperand)
-                });
-            },
-            b'i' => {
-                let atom = match iter.next().cloned() {
-                    Some(b'1') => Atom::ReadI8(save),
-                    Some(b'2') => Atom::ReadI16(save),
-                    Some(b'4') => Atom::ReadI32(save),
-                    _ => return Err(PatError::ReadOperand),
-                };
-                if save >= u8::MAX {
-                    return Err(PatError::SaveOverflow);
-                }
-                save += 1;
-                result.push(atom);
-            },
-            b'u' => {
-                let atom = match iter.next().cloned() {
-                    Some(b'1') => Atom::ReadU8(save),
-                    Some(b'2') => Atom::ReadU16(save),
-                    Some(b'4') => Atom::ReadU32(save),
-                    _ => return Err(PatError::ReadOperand),
-                };
-                if save >= u8::MAX {
-                    return Err(PatError::SaveOverflow);
-                }
-                save += 1;
-                result.push(atom);
-            },
-            b'z' => {
-                if save >= u8::MAX {
-                    return Err(PatError::SaveOverflow);
-                }
-                result.push(Atom::Zero(save));
-                save += 1;
-            },
-
-
-            // Allow spaces as padding
-            b' ' | b'\n' | b'\r' | b'\t' => {},
-            // Everything else is illegal
-            _ => {
-                return Err(PatError::UnknownChar);
-            },
-        }
-        // Converted from str originally, should be safe
-        *pat = unsafe { str::from_utf8_unchecked(iter.as_slice()) };
-    }
-    // Check balanced stack operators
-    if depth != 0 {
-        return Err(PatError::StackError);
-    }
-    // Check if sub patterns are balanced
-    if subs.len() != 0 {
-        return Err(PatError::SubPattern);
-    }
-
-    // Remove redundant atoms at the end
-    fn is_redundant(atom: &Atom) -> bool {
-        match atom {
-            | Atom::Skip(_)
-            | Atom::Rangext(_)
-            | Atom::Pop
-            | Atom::Many(_) => true,
-            _ => false,
-        }
-    }
-    while result.last().map(is_redundant).unwrap_or(false) {
-        result.pop();
-    }
-
-    Ok(())
-}
\ No newline at end of file
diff --git a/sub/xpat/Cargo.toml b/sub/xpat/Cargo.toml
index 31457b4..cbc804e 100644
--- a/sub/xpat/Cargo.toml
+++ b/sub/xpat/Cargo.toml
@@ -3,6 +3,9 @@ name = "sub_xpat"
 version = "0.1.0"
 edition = "2021"
 
+[features]
+alloc = []
+
 [dependencies]
 sub_core.workspace = true
 sub_macros.workspace = true
\ No newline at end of file
diff --git a/sub/xpat/src/hexdump.rs b/sub/xpat/src/hexdump.rs
index e37381d..946ec63 100644
--- a/sub/xpat/src/hexdump.rs
+++ b/sub/xpat/src/hexdump.rs
@@ -6,13 +6,15 @@ const SEP: &str = " | ";
 
 pub struct HexDump<'s, T: Scannable + ?Sized, R: RangeBounds<usize>>(pub &'s T, pub R);
 
+#[allow(clippy::needless_lifetimes)]
 pub fn hex<
+    'a,
     T: Scannable + ?Sized,
     R: RangeBounds<usize>
 >(
-    data: &T,
+    data: &'a T,
     range:R
-) -> HexDump<T, R> {
+) -> HexDump<'a, T, R> {
     HexDump(data, range)
 }
 
diff --git a/sub/xpat/src/lib.rs b/sub/xpat/src/lib.rs
index 03a4ae0..642134d 100644
--- a/sub/xpat/src/lib.rs
+++ b/sub/xpat/src/lib.rs
@@ -8,6 +8,12 @@ pub mod scannable;
 pub mod scanner;
 pub mod hexdump;
 
+#[cfg(feature = "alloc")]
+extern crate alloc;
+
+#[cfg(feature = "alloc")]
+pub mod parser;
+
 //
 // Export Preludes:
 //
diff --git a/sub/xpat/src/parser.rs b/sub/xpat/src/parser.rs
new file mode 100644
index 0000000..2706600
--- /dev/null
+++ b/sub/xpat/src/parser.rs
@@ -0,0 +1,477 @@
+use core::{cmp, fmt, mem, str};
+use super::atoms::Atom;
+use alloc::vec::Vec;
+
+/// Special skip value to indicate to use platform pointer size instead.
+pub(crate) const PTR_SKIP: u8 = 0;
+
+/// Pattern parsing error.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub struct ParsePatError {
+    kind: PatError,
+    position: usize,
+}
+
+impl fmt::Display for ParsePatError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "Syntax Error @{}: {}.", self.position, self.kind.to_str())
+    }
+}
+
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+enum PatError {
+    UnpairedHexDigit,
+    UnknownChar,
+    ManyOverflow,
+    ManyRange,
+    ManyInvalid,
+    SaveOverflow,
+    StackError,
+    StackInvalid,
+    UnclosedQuote,
+    AlignedOperand,
+    CheckOperand,
+    ReadOperand,
+    SubPattern,
+    SubOverflow,
+    DoubleNibble
+}
+impl PatError {
+    fn to_str(self) -> &'static str {
+        match self {
+            PatError::UnpairedHexDigit => "unpaired hex digit",
+            PatError::UnknownChar => "unknown character",
+            PatError::ManyOverflow => "many range exceeded",
+            PatError::ManyRange => "many bounds nonsensical",
+            PatError::ManyInvalid => "many invalid syntax",
+            PatError::SaveOverflow => "save store overflow",
+            PatError::StackError => "stack unbalanced",
+            PatError::StackInvalid => "stack must follow jump",
+            PatError::UnclosedQuote => "string missing end quote",
+            PatError::AlignedOperand => "aligned operand error",
+            PatError::CheckOperand => "aligned operand error",
+            PatError::ReadOperand => "read operand error",
+            PatError::SubPattern => "sub pattern error",
+            PatError::SubOverflow => "sub pattern too large",
+            PatError::DoubleNibble => "unpaired nibble wildcard",
+        }
+    }
+}
+
+//----------------------------------------------------------------
+
+/// Pattern parser.
+///
+/// # Remarks
+///
+/// Following are examples of the pattern syntax.
+/// The syntax takes inspiration from [YARA hexadecimal strings](https://yara.readthedocs.io/en/v3.7.0/writingrules.html#hexadecimal-strings).
+///
+/// ```text
+/// 55 89 e5 83 ? ec
+/// ```
+///
+/// Case insensitive hexadecimal characters match the exact byte pattern and question marks serve as placeholders for unknown bytes.
+///
+/// Note that a single question mark matches a whole byte. The syntax to mask part of a byte is not yet available.
+///
+/// Spaces (code point 32) are completely optional and carry no semantic meaning, their purpose is to visually group things together.
+///
+/// ```text
+/// b9 ' 37 13 00 00
+/// ```
+///
+/// Single quotes are used as a bookmarks, to save the current cursor rva in the save array passed to the scanner.
+///
+/// It is no longer necessary to do tedious address calculations to read information out of the byte stream after a match was found.
+/// This power really comes to life with the capability to follow relative and absolute references.
+///
+/// The first entry in the save array is reserved for the rva where the pattern was matched.
+/// The rest of the save array is filled in order of appearance of the quotes. Here the rva of the quote can be found in `save[1]`.
+///
+/// ```text
+/// b8 [16] 50 [13-42] ff
+/// ```
+///
+/// Pairs of decimal numbers separated by a hypen in square brackets indicate the lower and upper bound of number of bytes to skip.
+/// The scanner is non greedy and considers the first match while skipping as little as possible.
+///
+/// A single decimal number in square brackets without hypens is a fixed size jump, equivalent to writing that number of consecutive question marks.
+///
+/// ```text
+/// 31 c0 74 % ' c3
+/// e8 $ ' 31 c0 c3
+/// 68 * ' 31 c0 c3
+/// ```
+///
+/// These symbols are used to follow; a signed 1 byte relative jump: `%`, a signed 4 byte relative jump: `$` and an absolute pointer: `*`.
+///
+/// They are designed to be able to have the scanner follow short jumps, calls and longer jumps, and absolute pointers.
+///
+/// Composes really well with bookmarks to find the addresses of referenced functions and other data without tedious address calculations.
+///
+/// ```text
+/// b8 * "STRING" 00
+/// ```
+///
+/// String literals appear in double quotes and will be matched as UTF-8.
+///
+/// Escape sequences are not supported, switch back to matching with hex digits as needed.
+/// For UTF-16 support, you are welcome to send a PR.
+///
+/// ```text
+/// e8 $ { ' } 83 f0 5c c3
+/// ```
+///
+/// Curly braces must follow a jump symbol (see above).
+///
+/// The sub pattern enclosed within the curly braces is matched at the destination after following the jump.
+/// After the pattern successfully matched, the cursor returns to before the jump was followed.
+/// The bytes defining the jump are skipped and matching continues again from here.
+///
+/// ```text
+/// e8 $ @4
+/// ```
+///
+/// Checks that the cursor is aligned at this point in the scan.
+/// The align value is `(1 << arg)`, in this example the cursor is checked to be aligned to 16.
+///
+/// ```text
+/// e8 i1 a0 u4
+/// ```
+///
+/// An `i` or `u` indicates memory read operations followed by the size of the operand to read.
+///
+/// The read values are stored in the save array alongside the bookmarked addresses (single quotes).
+/// This means the values are sign- or zero- extended respectively before being stored.
+/// Operand sizes are 1 (byte), 2 (word) or 4 (dword).
+///
+/// The cursor is advanced by the size of the operand.
+///
+/// ```text
+/// 83 c0 2a ( 6a ? | 68 ? ? ? ? ) e8
+/// ```
+///
+/// Parentheses indicate alternate subpatterns separated by a pipe character.
+///
+/// The scanner attempts to match the alternate subpatterns from left to right and fails if none of them match.
+pub fn parse(pat: &str) -> Result<Vec<Atom>, ParsePatError> {
+    let mut result = Vec::with_capacity(pat.len() / 2);
+    let mut pat_end = pat;
+    match parse_helper(&mut pat_end, &mut result) {
+        Ok(()) => Ok(result),
+        Err(kind) => {
+            let position = pat_end.as_ptr() as usize - pat.as_ptr() as usize;
+            Err(ParsePatError { kind, position })
+        },
+    }
+}
+// This is preferable but currently limited by macro rules...
+// pub use crate::pattern as parse;
+fn parse_helper(pat: &mut &str, result: &mut Vec<Atom>) -> Result<(), PatError> {
+    result.push(Atom::Save(0));
+    let mut iter = pat.as_bytes().iter();
+    let mut save = 1;
+    let mut depth = 0;
+    #[derive(Default)]
+    struct SubPattern {
+        case: usize,
+        brks: Vec<usize>,
+        save: u8,
+        save_next: u8,
+        depth: u8,
+    }
+    let mut subs = Vec::<SubPattern>::new();
+    while let Some(mut chr) = iter.next().cloned() {
+        match chr {
+            // Follow signed 1 byte jump
+            b'%' => result.push(Atom::Jump1),
+            // Follow signed 4 byte jump
+            b'$' => result.push(Atom::Jump4),
+            // Follow pointer
+            b'*' => result.push(Atom::Ptr),
+            // Start recursive operator
+            b'{' => {
+                depth += 1;
+                // Must follow a jump operator and insert push before the jump
+                let atom = match result.last_mut() {
+                    Some(atom @ Atom::Jump1) => mem::replace(atom, Atom::Push(1)),
+                    Some(atom @ Atom::Jump4) => mem::replace(atom, Atom::Push(4)),
+                    Some(atom @ Atom::Ptr) => mem::replace(atom, Atom::Push(PTR_SKIP)),
+                    _ => return Err(PatError::StackInvalid),
+                };
+                result.push(atom);
+            },
+            // End recursive operator
+            b'}' => {
+                // Unbalanced recursion
+                if depth <= 0 {
+                    return Err(PatError::StackError);
+                }
+                depth -= 1;
+                result.push(Atom::Pop);
+            },
+            // Start subpattern
+            b'(' => {
+                subs.push(SubPattern::default());
+                let sub = subs.last_mut().unwrap();
+                // Keep the save and depth state
+                sub.save = save;
+                sub.depth = depth;
+                // Add a new case, update the case offset later
+                sub.case = result.len();
+                result.push(Atom::Case(0));
+            },
+            // Case subpattern
+            b'|' => {
+                // Should already have started a subpattern
+                let sub = subs.last_mut().ok_or(PatError::SubPattern)?;
+                // Update the save state
+                sub.save_next = cmp::max(sub.save_next, save);
+                save = sub.save;
+                depth = sub.depth;
+                // Add a break of the previous subpattern
+                sub.brks.push(result.len());
+                result.push(Atom::Break(0));
+                // Add a new case of the next subpattern
+                let case_offset = result.len() - sub.case - 1;
+                if case_offset >= 256 {
+                    return Err(PatError::SubOverflow);
+                }
+                result[sub.case] = Atom::Case(case_offset as u8);
+                sub.case = result.len();
+                result.push(Atom::Case(0));
+            },
+            // End subpattern
+            b')' => {
+                // Should already have started a subpattern
+                let sub = subs.pop().ok_or(PatError::SubPattern)?;
+                // Prepare for the next save
+                save = cmp::max(sub.save_next, save);
+                depth = sub.depth;
+                // Neutralize the last case, since there are no more
+                result[sub.case] = Atom::Nop;
+                // Fill in the breaks
+                for &brk in &sub.brks {
+                    let brk_offset = result.len() - brk - 1;
+                    if brk_offset >= 256 {
+                        return Err(PatError::SubOverflow);
+                    }
+                    result[brk] = Atom::Break(brk_offset as u8);
+                }
+            },
+            // Skip many operator
+            b'[' => {
+                // Parse the lower bound
+                let mut lower_bound = 0u32;
+                let mut at_least_one_char = false;
+                loop {
+                    chr = iter.next().cloned().ok_or(PatError::ManyInvalid)?;
+                    match chr {
+                        b'-' | b']' => break,
+                        chr @ b'0'..=b'9' => {
+                            at_least_one_char = true;
+                            lower_bound = lower_bound * 10 + (chr - b'0') as u32;
+                            if lower_bound >= 16384 {
+                                return Err(PatError::ManyOverflow);
+                            }
+                        },
+                        _ => return Err(PatError::ManyInvalid),
+                    }
+                }
+                if !at_least_one_char {
+                    return Err(PatError::ManyInvalid);
+                }
+                // Turn the lower bound into skip ops
+                if lower_bound > 0 {
+                    if lower_bound >= 256 {
+                        result.push(Atom::Rangext((lower_bound >> 8) as u8));
+                    }
+                    result.push(Atom::Skip((lower_bound & 0xff) as u8));
+                }
+                // Second many part is optional
+                if chr == b']' {
+                    continue;
+                }
+                // Parse the upper bound
+                let mut upper_bound = 0u32;
+                loop {
+                    chr = iter.next().cloned().ok_or(PatError::ManyInvalid)?;
+                    match chr {
+                        b']' => break,
+                        chr @ b'0'..=b'9' => {
+                            upper_bound = upper_bound * 10 + (chr - b'0') as u32;
+                            if upper_bound >= 16384 {
+                                return Err(PatError::ManyOverflow);
+                            }
+                        },
+                        _ => return Err(PatError::ManyInvalid),
+                    }
+                }
+                // Lower bound should be strictly less than the upper bound
+                if lower_bound < upper_bound {
+                    let many_skip = upper_bound - lower_bound;
+                    if many_skip >= 256 {
+                        result.push(Atom::Rangext((many_skip >> 8) as u8));
+                    }
+                    result.push(Atom::Many((many_skip & 0xff) as u8));
+                }
+                else {
+                    return Err(PatError::ManyRange);
+                }
+            },
+            // Match a byte
+            b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' | b'.' => {
+                let mut mask = 0xFF;
+
+                // High nibble of the byte
+                let hi = if chr == b'.' { mask &= 0x0F;0 }
+                else if chr >= b'a' { chr - b'a' + 10 }
+                else if chr >= b'A' { chr - b'A' + 10 }
+                else { chr - b'0' };
+
+                chr = iter.next().cloned().ok_or(PatError::UnpairedHexDigit)?;
+                // Low nibble of the byte
+                let lo = if chr >= b'a' && chr <= b'f' { chr - b'a' + 10 }
+                else if chr >= b'A' && chr <= b'F' { chr - b'A' + 10 }
+                else if chr >= b'0' && chr <= b'9' { chr - b'0' }
+                else if chr == b'.' { mask &= 0xF0; 0 }
+                else { return Err(PatError::UnpairedHexDigit); };
+
+                if mask == 0 { return Err(PatError::DoubleNibble); };
+
+                // mask out nibble
+                if mask != 0xFF { result.push(Atom::Fuzzy(mask)) }
+
+                // Add byte to the pattern
+                result.push(Atom::Byte((hi << 4) + lo));
+            },
+            // Match raw bytes
+            b'"' => {
+                loop {
+                    if let Some(chr) = iter.next().cloned() {
+                        if chr != b'"' {
+                            result.push(Atom::Byte(chr));
+                        }
+                        else {
+                            break;
+                        }
+                    }
+                    else {
+                        return Err(PatError::UnclosedQuote);
+                    }
+                }
+            },
+            // Save the cursor
+            b'\'' => {
+                // 'Limited' save space
+                if save >= u8::MAX {
+                    return Err(PatError::SaveOverflow);
+                }
+                result.push(Atom::Save(save));
+                save += 1;
+            },
+            // Skip bytes
+            b'?' => {
+                // match result.last_mut() {
+                // 	Some(Atom::Skip(skip)) if *skip != PTR_SKIP && *skip < 127i8 => *skip += 1,
+                // 	_ => result.push(Atom::Skip(1)),
+                // };
+                // Coalescence skips together
+                if let Some(Atom::Skip(skip)) = result.last_mut() {
+                    if *skip != PTR_SKIP && *skip < 255u8 {
+                        *skip += 1;
+                        continue;
+                    }
+                }
+                result.push(Atom::Skip(1));
+            },
+
+            b'=' => {
+                let op = iter.next().cloned().ok_or(PatError::CheckOperand)?;
+                result.push( match op {
+                    b'0'..=b'9' => Atom::Check(op - b'0'),
+                    b'A'..=b'Z' => Atom::Check(10 + (op - b'A')),
+                    b'a'..=b'z' => Atom::Check(10 + (op - b'a')),
+                    _ => return Err(PatError::CheckOperand)
+                });
+            },
+            b'@' => {
+                let op = iter.next().cloned().ok_or(PatError::AlignedOperand)?;
+                result.push( match op {
+                    b'0'..=b'9' => Atom::Aligned(op - b'0'),
+                    b'A'..=b'Z' => Atom::Aligned(10 + (op - b'A')),
+                    b'a'..=b'z' => Atom::Aligned(10 + (op - b'a')),
+                    _ => return Err(PatError::AlignedOperand)
+                });
+            },
+            b'i' => {
+                let atom = match iter.next().cloned() {
+                    Some(b'1') => Atom::ReadI8(save),
+                    Some(b'2') => Atom::ReadI16(save),
+                    Some(b'4') => Atom::ReadI32(save),
+                    _ => return Err(PatError::ReadOperand),
+                };
+                if save >= u8::MAX {
+                    return Err(PatError::SaveOverflow);
+                }
+                save += 1;
+                result.push(atom);
+            },
+            b'u' => {
+                let atom = match iter.next().cloned() {
+                    Some(b'1') => Atom::ReadU8(save),
+                    Some(b'2') => Atom::ReadU16(save),
+                    Some(b'4') => Atom::ReadU32(save),
+                    _ => return Err(PatError::ReadOperand),
+                };
+                if save >= u8::MAX {
+                    return Err(PatError::SaveOverflow);
+                }
+                save += 1;
+                result.push(atom);
+            },
+            b'z' => {
+                if save >= u8::MAX {
+                    return Err(PatError::SaveOverflow);
+                }
+                result.push(Atom::Zero(save));
+                save += 1;
+            },
+
+
+            // Allow spaces as padding
+            b' ' | b'\n' | b'\r' | b'\t' => {},
+            // Everything else is illegal
+            _ => {
+                return Err(PatError::UnknownChar);
+            },
+        }
+        // Converted from str originally, should be safe
+        *pat = unsafe { str::from_utf8_unchecked(iter.as_slice()) };
+    }
+    // Check balanced stack operators
+    if depth != 0 {
+        return Err(PatError::StackError);
+    }
+    // Check if sub patterns are balanced
+    if subs.len() != 0 {
+        return Err(PatError::SubPattern);
+    }
+
+    // Remove redundant atoms at the end
+    fn is_redundant(atom: &Atom) -> bool {
+        match atom {
+            | Atom::Skip(_)
+            | Atom::Rangext(_)
+            | Atom::Pop
+            | Atom::Many(_) => true,
+            _ => false,
+        }
+    }
+    while result.last().map(is_redundant).unwrap_or(false) {
+        result.pop();
+    }
+
+    Ok(())
+}
\ No newline at end of file