expose parser.rs via a hack

2025-11-28 02:12:33 -05:00
parent 9d0e3cbc3a
commit 8b8b506319
6 changed files with 503 additions and 479 deletions
--- a/sub/_macros/src/lib.rs
+++ b/sub/_macros/src/lib.rs
@@ -1,5 +1,8 @@
 #![allow(unused)]
 // xpat's parser requires this
 extern crate alloc;
 mod from_repr;
 mod patterns;
--- a/sub/_macros/src/patterns.rs
+++ b/sub/_macros/src/patterns.rs
@@ -1,6 +1,15 @@
 use core::{cmp, fmt, mem, str};
 use proc_macro::{Literal, TokenStream, TokenTree};
 mod atoms {
    include!("../../xpat/src/atoms.rs");
 }
 mod parser {
    include!("../../xpat/src/parser.rs");
 }
 /// Compile time pattern parser.
 ///
 /// ```ignore
@@ -14,7 +23,7 @@ pub fn proc_pattern(input: TokenStream) -> TokenStream {
        _e => panic!("expected a single string literal to parse, got: {_e:?}"),
    };
-    let pattern = match parse(&string) {
+    let pattern = match parser::parse(&string) {
        Ok(pattern) => pattern,
        Err(err) => panic!("invalid pattern syntax: {}", err),
    };
@@ -52,479 +61,3 @@ fn parse_str_literal(input: &Literal) -> String {
    }
    string
 }
 /// Special skip value to indicate to use platform pointer size instead.
 pub(crate) const PTR_SKIP: u8 = 0;
 /// Pattern parsing error.
 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
 pub struct ParsePatError {
    kind: PatError,
    position: usize,
 }
 impl fmt::Display for ParsePatError {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "Syntax Error @{}: {}.", self.position, self.kind.to_str())
    }
 }
 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
 enum PatError {
    UnpairedHexDigit,
    UnknownChar,
    ManyOverflow,
    ManyRange,
    ManyInvalid,
    SaveOverflow,
    StackError,
    StackInvalid,
    UnclosedQuote,
    AlignedOperand,
    CheckOperand,
    ReadOperand,
    SubPattern,
    SubOverflow,
    DoubleNibble
 }
 impl PatError {
    fn to_str(self) -> &'static str {
        match self {
            PatError::UnpairedHexDigit => "unpaired hex digit",
            PatError::UnknownChar => "unknown character",
            PatError::ManyOverflow => "many range exceeded",
            PatError::ManyRange => "many bounds nonsensical",
            PatError::ManyInvalid => "many invalid syntax",
            PatError::SaveOverflow => "save store overflow",
            PatError::StackError => "stack unbalanced",
            PatError::StackInvalid => "stack must follow jump",
            PatError::UnclosedQuote => "string missing end quote",
            PatError::AlignedOperand => "aligned operand error",
            PatError::CheckOperand => "aligned operand error",
            PatError::ReadOperand => "read operand error",
            PatError::SubPattern => "sub pattern error",
            PatError::SubOverflow => "sub pattern too large",
            PatError::DoubleNibble => "unpaired nibble wildcard",
        }
    }
 }
 //----------------------------------------------------------------
 include!("../../xpat/src/atoms.rs");
 /// Pattern parser.
 ///
 /// # Remarks
 ///
 /// Following are examples of the pattern syntax.
 /// The syntax takes inspiration from [YARA hexadecimal strings](https://yara.readthedocs.io/en/v3.7.0/writingrules.html#hexadecimal-strings).
 ///
 /// ```text
 /// 55 89 e5 83 ? ec
 /// ```
 ///
 /// Case insensitive hexadecimal characters match the exact byte pattern and question marks serve as placeholders for unknown bytes.
 ///
 /// Note that a single question mark matches a whole byte. The syntax to mask part of a byte is not yet available.
 ///
 /// Spaces (code point 32) are completely optional and carry no semantic meaning, their purpose is to visually group things together.
 ///
 /// ```text
 /// b9 ' 37 13 00 00
 /// ```
 ///
 /// Single quotes are used as a bookmarks, to save the current cursor rva in the save array passed to the scanner.
 ///
 /// It is no longer necessary to do tedious address calculations to read information out of the byte stream after a match was found.
 /// This power really comes to life with the capability to follow relative and absolute references.
 ///
 /// The first entry in the save array is reserved for the rva where the pattern was matched.
 /// The rest of the save array is filled in order of appearance of the quotes. Here the rva of the quote can be found in `save[1]`.
 ///
 /// ```text
 /// b8 [16] 50 [13-42] ff
 /// ```
 ///
 /// Pairs of decimal numbers separated by a hypen in square brackets indicate the lower and upper bound of number of bytes to skip.
 /// The scanner is non greedy and considers the first match while skipping as little as possible.
 ///
 /// A single decimal number in square brackets without hypens is a fixed size jump, equivalent to writing that number of consecutive question marks.
 ///
 /// ```text
 /// 31 c0 74 % ' c3
 /// e8 $ ' 31 c0 c3
 /// 68 * ' 31 c0 c3
 /// ```
 ///
 /// These symbols are used to follow; a signed 1 byte relative jump: `%`, a signed 4 byte relative jump: `$` and an absolute pointer: `*`.
 ///
 /// They are designed to be able to have the scanner follow short jumps, calls and longer jumps, and absolute pointers.
 ///
 /// Composes really well with bookmarks to find the addresses of referenced functions and other data without tedious address calculations.
 ///
 /// ```text
 /// b8 * "STRING" 00
 /// ```
 ///
 /// String literals appear in double quotes and will be matched as UTF-8.
 ///
 /// Escape sequences are not supported, switch back to matching with hex digits as needed.
 /// For UTF-16 support, you are welcome to send a PR.
 ///
 /// ```text
 /// e8 $ { ' } 83 f0 5c c3
 /// ```
 ///
 /// Curly braces must follow a jump symbol (see above).
 ///
 /// The sub pattern enclosed within the curly braces is matched at the destination after following the jump.
 /// After the pattern successfully matched, the cursor returns to before the jump was followed.
 /// The bytes defining the jump are skipped and matching continues again from here.
 ///
 /// ```text
 /// e8 $ @4
 /// ```
 ///
 /// Checks that the cursor is aligned at this point in the scan.
 /// The align value is `(1 << arg)`, in this example the cursor is checked to be aligned to 16.
 ///
 /// ```text
 /// e8 i1 a0 u4
 /// ```
 ///
 /// An `i` or `u` indicates memory read operations followed by the size of the operand to read.
 ///
 /// The read values are stored in the save array alongside the bookmarked addresses (single quotes).
 /// This means the values are sign- or zero- extended respectively before being stored.
 /// Operand sizes are 1 (byte), 2 (word) or 4 (dword).
 ///
 /// The cursor is advanced by the size of the operand.
 ///
 /// ```text
 /// 83 c0 2a ( 6a ? | 68 ? ? ? ? ) e8
 /// ```
 ///
 /// Parentheses indicate alternate subpatterns separated by a pipe character.
 ///
 /// The scanner attempts to match the alternate subpatterns from left to right and fails if none of them match.
 pub fn parse(pat: &str) -> Result<Vec<Atom>, ParsePatError> {
    let mut result = Vec::with_capacity(pat.len() / 2);
    let mut pat_end = pat;
    match parse_helper(&mut pat_end, &mut result) {
        Ok(()) => Ok(result),
        Err(kind) => {
            let position = pat_end.as_ptr() as usize - pat.as_ptr() as usize;
            Err(ParsePatError { kind, position })
        },
    }
 }
 // This is preferable but currently limited by macro rules...
 // pub use crate::pattern as parse;
 fn parse_helper(pat: &mut &str, result: &mut Vec<Atom>) -> Result<(), PatError> {
    result.push(Atom::Save(0));
    let mut iter = pat.as_bytes().iter();
    let mut save = 1;
    let mut depth = 0;
    #[derive(Default)]
    struct SubPattern {
        case: usize,
        brks: Vec<usize>,
        save: u8,
        save_next: u8,
        depth: u8,
    }
    let mut subs = Vec::<SubPattern>::new();
    while let Some(mut chr) = iter.next().cloned() {
        match chr {
            // Follow signed 1 byte jump
            b'%' => result.push(Atom::Jump1),
            // Follow signed 4 byte jump
            b'$' => result.push(Atom::Jump4),
            // Follow pointer
            b'*' => result.push(Atom::Ptr),
            // Start recursive operator
            b'{' => {
                depth += 1;
                // Must follow a jump operator and insert push before the jump
                let atom = match result.last_mut() {
                    Some(atom @ Atom::Jump1) => mem::replace(atom, Atom::Push(1)),
                    Some(atom @ Atom::Jump4) => mem::replace(atom, Atom::Push(4)),
                    Some(atom @ Atom::Ptr) => mem::replace(atom, Atom::Push(PTR_SKIP)),
                    _ => return Err(PatError::StackInvalid),
                };
                result.push(atom);
            },
            // End recursive operator
            b'}' => {
                // Unbalanced recursion
                if depth <= 0 {
                    return Err(PatError::StackError);
                }
                depth -= 1;
                result.push(Atom::Pop);
            },
            // Start subpattern
            b'(' => {
                subs.push(SubPattern::default());
                let sub = subs.last_mut().unwrap();
                // Keep the save and depth state
                sub.save = save;
                sub.depth = depth;
                // Add a new case, update the case offset later
                sub.case = result.len();
                result.push(Atom::Case(0));
            },
            // Case subpattern
            b'|' => {
                // Should already have started a subpattern
                let sub = subs.last_mut().ok_or(PatError::SubPattern)?;
                // Update the save state
                sub.save_next = cmp::max(sub.save_next, save);
                save = sub.save;
                depth = sub.depth;
                // Add a break of the previous subpattern
                sub.brks.push(result.len());
                result.push(Atom::Break(0));
                // Add a new case of the next subpattern
                let case_offset = result.len() - sub.case - 1;
                if case_offset >= 256 {
                    return Err(PatError::SubOverflow);
                }
                result[sub.case] = Atom::Case(case_offset as u8);
                sub.case = result.len();
                result.push(Atom::Case(0));
            },
            // End subpattern
            b')' => {
                // Should already have started a subpattern
                let sub = subs.pop().ok_or(PatError::SubPattern)?;
                // Prepare for the next save
                save = cmp::max(sub.save_next, save);
                depth = sub.depth;
                // Neutralize the last case, since there are no more
                result[sub.case] = Atom::Nop;
                // Fill in the breaks
                for &brk in &sub.brks {
                    let brk_offset = result.len() - brk - 1;
                    if brk_offset >= 256 {
                        return Err(PatError::SubOverflow);
                    }
                    result[brk] = Atom::Break(brk_offset as u8);
                }
            },
            // Skip many operator
            b'[' => {
                // Parse the lower bound
                let mut lower_bound = 0u32;
                let mut at_least_one_char = false;
                loop {
                    chr = iter.next().cloned().ok_or(PatError::ManyInvalid)?;
                    match chr {
                        b'-' | b']' => break,
                        chr @ b'0'..=b'9' => {
                            at_least_one_char = true;
                            lower_bound = lower_bound * 10 + (chr - b'0') as u32;
                            if lower_bound >= 16384 {
                                return Err(PatError::ManyOverflow);
                            }
                        },
                        _ => return Err(PatError::ManyInvalid),
                    }
                }
                if !at_least_one_char {
                    return Err(PatError::ManyInvalid);
                }
                // Turn the lower bound into skip ops
                if lower_bound > 0 {
                    if lower_bound >= 256 {
                        result.push(Atom::Rangext((lower_bound >> 8) as u8));
                    }
                    result.push(Atom::Skip((lower_bound & 0xff) as u8));
                }
                // Second many part is optional
                if chr == b']' {
                    continue;
                }
                // Parse the upper bound
                let mut upper_bound = 0u32;
                loop {
                    chr = iter.next().cloned().ok_or(PatError::ManyInvalid)?;
                    match chr {
                        b']' => break,
                        chr @ b'0'..=b'9' => {
                            upper_bound = upper_bound * 10 + (chr - b'0') as u32;
                            if upper_bound >= 16384 {
                                return Err(PatError::ManyOverflow);
                            }
                        },
                        _ => return Err(PatError::ManyInvalid),
                    }
                }
                // Lower bound should be strictly less than the upper bound
                if lower_bound < upper_bound {
                    let many_skip = upper_bound - lower_bound;
                    if many_skip >= 256 {
                        result.push(Atom::Rangext((many_skip >> 8) as u8));
                    }
                    result.push(Atom::Many((many_skip & 0xff) as u8));
                }
                else {
                    return Err(PatError::ManyRange);
                }
            },
            // Match a byte
            b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' | b'.' => {
                let mut mask = 0xFF;
                // High nibble of the byte
                let hi = if chr == b'.' { mask &= 0x0F;0 }
                else if chr >= b'a' { chr - b'a' + 10 }
                else if chr >= b'A' { chr - b'A' + 10 }
                else { chr - b'0' };
                chr = iter.next().cloned().ok_or(PatError::UnpairedHexDigit)?;
                // Low nibble of the byte
                let lo = if chr >= b'a' && chr <= b'f' { chr - b'a' + 10 }
                else if chr >= b'A' && chr <= b'F' { chr - b'A' + 10 }
                else if chr >= b'0' && chr <= b'9' { chr - b'0' }
                else if chr == b'.' { mask &= 0xF0; 0 }
                else { return Err(PatError::UnpairedHexDigit); };
                if mask == 0 { return Err(PatError::DoubleNibble); };
                // mask out nibble
                if mask != 0xFF { result.push(Atom::Fuzzy(mask)) }
                // Add byte to the pattern
                result.push(Atom::Byte((hi << 4) + lo));
            },
            // Match raw bytes
            b'"' => {
                loop {
                    if let Some(chr) = iter.next().cloned() {
                        if chr != b'"' {
                            result.push(Atom::Byte(chr));
                        }
                        else {
                            break;
                        }
                    }
                    else {
                        return Err(PatError::UnclosedQuote);
                    }
                }
            },
            // Save the cursor
            b'\'' => {
                // 'Limited' save space
                if save >= u8::MAX {
                    return Err(PatError::SaveOverflow);
                }
                result.push(Atom::Save(save));
                save += 1;
            },
            // Skip bytes
            b'?' => {
                // match result.last_mut() {
                // 	Some(Atom::Skip(skip)) if *skip != PTR_SKIP && *skip < 127i8 => *skip += 1,
                // 	_ => result.push(Atom::Skip(1)),
                // };
                // Coalescence skips together
                if let Some(Atom::Skip(skip)) = result.last_mut() {
                    if *skip != PTR_SKIP && *skip < 255u8 {
                        *skip += 1;
                        continue;
                    }
                }
                result.push(Atom::Skip(1));
            },
            b'=' => {
                let op = iter.next().cloned().ok_or(PatError::CheckOperand)?;
                result.push( match op {
                    b'0'..=b'9' => Atom::Check(op - b'0'),
                    b'A'..=b'Z' => Atom::Check(10 + (op - b'A')),
                    b'a'..=b'z' => Atom::Check(10 + (op - b'a')),
                    _ => return Err(PatError::CheckOperand)
                });
            },
            b'@' => {
                let op = iter.next().cloned().ok_or(PatError::AlignedOperand)?;
                result.push( match op {
                    b'0'..=b'9' => Atom::Aligned(op - b'0'),
                    b'A'..=b'Z' => Atom::Aligned(10 + (op - b'A')),
                    b'a'..=b'z' => Atom::Aligned(10 + (op - b'a')),
                    _ => return Err(PatError::AlignedOperand)
                });
            },
            b'i' => {
                let atom = match iter.next().cloned() {
                    Some(b'1') => Atom::ReadI8(save),
                    Some(b'2') => Atom::ReadI16(save),
                    Some(b'4') => Atom::ReadI32(save),
                    _ => return Err(PatError::ReadOperand),
                };
                if save >= u8::MAX {
                    return Err(PatError::SaveOverflow);
                }
                save += 1;
                result.push(atom);
            },
            b'u' => {
                let atom = match iter.next().cloned() {
                    Some(b'1') => Atom::ReadU8(save),
                    Some(b'2') => Atom::ReadU16(save),
                    Some(b'4') => Atom::ReadU32(save),
                    _ => return Err(PatError::ReadOperand),
                };
                if save >= u8::MAX {
                    return Err(PatError::SaveOverflow);
                }
                save += 1;
                result.push(atom);
            },
            b'z' => {
                if save >= u8::MAX {
                    return Err(PatError::SaveOverflow);
                }
                result.push(Atom::Zero(save));
                save += 1;
            },
            // Allow spaces as padding
            b' ' | b'\n' | b'\r' | b'\t' => {},
            // Everything else is illegal
            _ => {
                return Err(PatError::UnknownChar);
            },
        }
        // Converted from str originally, should be safe
        *pat = unsafe { str::from_utf8_unchecked(iter.as_slice()) };
    }
    // Check balanced stack operators
    if depth != 0 {
        return Err(PatError::StackError);
    }
    // Check if sub patterns are balanced
    if subs.len() != 0 {
        return Err(PatError::SubPattern);
    }
    // Remove redundant atoms at the end
    fn is_redundant(atom: &Atom) -> bool {
        match atom {
            | Atom::Skip(_)
            | Atom::Rangext(_)
            | Atom::Pop
            | Atom::Many(_) => true,
            _ => false,
        }
    }
    while result.last().map(is_redundant).unwrap_or(false) {
        result.pop();
    }
    Ok(())
 }
--- a/sub/xpat/Cargo.toml
+++ b/sub/xpat/Cargo.toml
@@ -3,6 +3,9 @@ name = "sub_xpat"
 version = "0.1.0"
 edition = "2021"
 [features]
 alloc = []
 [dependencies]
 sub_core.workspace = true
 sub_macros.workspace = true
--- a/sub/xpat/src/hexdump.rs
+++ b/sub/xpat/src/hexdump.rs
@@ -6,13 +6,15 @@ const SEP: &str = " | ";
 pub struct HexDump<'s, T: Scannable + ?Sized, R: RangeBounds<usize>>(pub &'s T, pub R);
 #[allow(clippy::needless_lifetimes)]
 pub fn hex<
    'a,
    T: Scannable + ?Sized,
    R: RangeBounds<usize>
 >(
-    data: &T,
+    data: &'a T,
    range:R
-) -> HexDump<T, R> {
+) -> HexDump<'a, T, R> {
    HexDump(data, range)
 }
--- a/sub/xpat/src/lib.rs
+++ b/sub/xpat/src/lib.rs
@@ -8,6 +8,12 @@ pub mod scannable;
 pub mod scanner;
 pub mod hexdump;
 #[cfg(feature = "alloc")]
 extern crate alloc;
 #[cfg(feature = "alloc")]
 pub mod parser;
 //
 // Export Preludes:
 //
--- a/sub/xpat/src/parser.rs
+++ b/sub/xpat/src/parser.rs
@@ -0,0 +1,477 @@
 use core::{cmp, fmt, mem, str};
 use super::atoms::Atom;
 use alloc::vec::Vec;
 /// Special skip value to indicate to use platform pointer size instead.
 pub(crate) const PTR_SKIP: u8 = 0;
 /// Pattern parsing error.
 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
 pub struct ParsePatError {
    kind: PatError,
    position: usize,
 }
 impl fmt::Display for ParsePatError {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "Syntax Error @{}: {}.", self.position, self.kind.to_str())
    }
 }
 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
 enum PatError {
    UnpairedHexDigit,
    UnknownChar,
    ManyOverflow,
    ManyRange,
    ManyInvalid,
    SaveOverflow,
    StackError,
    StackInvalid,
    UnclosedQuote,
    AlignedOperand,
    CheckOperand,
    ReadOperand,
    SubPattern,
    SubOverflow,
    DoubleNibble
 }
 impl PatError {
    fn to_str(self) -> &'static str {
        match self {
            PatError::UnpairedHexDigit => "unpaired hex digit",
            PatError::UnknownChar => "unknown character",
            PatError::ManyOverflow => "many range exceeded",
            PatError::ManyRange => "many bounds nonsensical",
            PatError::ManyInvalid => "many invalid syntax",
            PatError::SaveOverflow => "save store overflow",
            PatError::StackError => "stack unbalanced",
            PatError::StackInvalid => "stack must follow jump",
            PatError::UnclosedQuote => "string missing end quote",
            PatError::AlignedOperand => "aligned operand error",
            PatError::CheckOperand => "aligned operand error",
            PatError::ReadOperand => "read operand error",
            PatError::SubPattern => "sub pattern error",
            PatError::SubOverflow => "sub pattern too large",
            PatError::DoubleNibble => "unpaired nibble wildcard",
        }
    }
 }
 //----------------------------------------------------------------
 /// Pattern parser.
 ///
 /// # Remarks
 ///
 /// Following are examples of the pattern syntax.
 /// The syntax takes inspiration from [YARA hexadecimal strings](https://yara.readthedocs.io/en/v3.7.0/writingrules.html#hexadecimal-strings).
 ///
 /// ```text
 /// 55 89 e5 83 ? ec
 /// ```
 ///
 /// Case insensitive hexadecimal characters match the exact byte pattern and question marks serve as placeholders for unknown bytes.
 ///
 /// Note that a single question mark matches a whole byte. The syntax to mask part of a byte is not yet available.
 ///
 /// Spaces (code point 32) are completely optional and carry no semantic meaning, their purpose is to visually group things together.
 ///
 /// ```text
 /// b9 ' 37 13 00 00
 /// ```
 ///
 /// Single quotes are used as a bookmarks, to save the current cursor rva in the save array passed to the scanner.
 ///
 /// It is no longer necessary to do tedious address calculations to read information out of the byte stream after a match was found.
 /// This power really comes to life with the capability to follow relative and absolute references.
 ///
 /// The first entry in the save array is reserved for the rva where the pattern was matched.
 /// The rest of the save array is filled in order of appearance of the quotes. Here the rva of the quote can be found in `save[1]`.
 ///
 /// ```text
 /// b8 [16] 50 [13-42] ff
 /// ```
 ///
 /// Pairs of decimal numbers separated by a hypen in square brackets indicate the lower and upper bound of number of bytes to skip.
 /// The scanner is non greedy and considers the first match while skipping as little as possible.
 ///
 /// A single decimal number in square brackets without hypens is a fixed size jump, equivalent to writing that number of consecutive question marks.
 ///
 /// ```text
 /// 31 c0 74 % ' c3
 /// e8 $ ' 31 c0 c3
 /// 68 * ' 31 c0 c3
 /// ```
 ///
 /// These symbols are used to follow; a signed 1 byte relative jump: `%`, a signed 4 byte relative jump: `$` and an absolute pointer: `*`.
 ///
 /// They are designed to be able to have the scanner follow short jumps, calls and longer jumps, and absolute pointers.
 ///
 /// Composes really well with bookmarks to find the addresses of referenced functions and other data without tedious address calculations.
 ///
 /// ```text
 /// b8 * "STRING" 00
 /// ```
 ///
 /// String literals appear in double quotes and will be matched as UTF-8.
 ///
 /// Escape sequences are not supported, switch back to matching with hex digits as needed.
 /// For UTF-16 support, you are welcome to send a PR.
 ///
 /// ```text
 /// e8 $ { ' } 83 f0 5c c3
 /// ```
 ///
 /// Curly braces must follow a jump symbol (see above).
 ///
 /// The sub pattern enclosed within the curly braces is matched at the destination after following the jump.
 /// After the pattern successfully matched, the cursor returns to before the jump was followed.
 /// The bytes defining the jump are skipped and matching continues again from here.
 ///
 /// ```text
 /// e8 $ @4
 /// ```
 ///
 /// Checks that the cursor is aligned at this point in the scan.
 /// The align value is `(1 << arg)`, in this example the cursor is checked to be aligned to 16.
 ///
 /// ```text
 /// e8 i1 a0 u4
 /// ```
 ///
 /// An `i` or `u` indicates memory read operations followed by the size of the operand to read.
 ///
 /// The read values are stored in the save array alongside the bookmarked addresses (single quotes).
 /// This means the values are sign- or zero- extended respectively before being stored.
 /// Operand sizes are 1 (byte), 2 (word) or 4 (dword).
 ///
 /// The cursor is advanced by the size of the operand.
 ///
 /// ```text
 /// 83 c0 2a ( 6a ? | 68 ? ? ? ? ) e8
 /// ```
 ///
 /// Parentheses indicate alternate subpatterns separated by a pipe character.
 ///
 /// The scanner attempts to match the alternate subpatterns from left to right and fails if none of them match.
 pub fn parse(pat: &str) -> Result<Vec<Atom>, ParsePatError> {
    let mut result = Vec::with_capacity(pat.len() / 2);
    let mut pat_end = pat;
    match parse_helper(&mut pat_end, &mut result) {
        Ok(()) => Ok(result),
        Err(kind) => {
            let position = pat_end.as_ptr() as usize - pat.as_ptr() as usize;
            Err(ParsePatError { kind, position })
        },
    }
 }
 // This is preferable but currently limited by macro rules...
 // pub use crate::pattern as parse;
 fn parse_helper(pat: &mut &str, result: &mut Vec<Atom>) -> Result<(), PatError> {
    result.push(Atom::Save(0));
    let mut iter = pat.as_bytes().iter();
    let mut save = 1;
    let mut depth = 0;
    #[derive(Default)]
    struct SubPattern {
        case: usize,
        brks: Vec<usize>,
        save: u8,
        save_next: u8,
        depth: u8,
    }
    let mut subs = Vec::<SubPattern>::new();
    while let Some(mut chr) = iter.next().cloned() {
        match chr {
            // Follow signed 1 byte jump
            b'%' => result.push(Atom::Jump1),
            // Follow signed 4 byte jump
            b'$' => result.push(Atom::Jump4),
            // Follow pointer
            b'*' => result.push(Atom::Ptr),
            // Start recursive operator
            b'{' => {
                depth += 1;
                // Must follow a jump operator and insert push before the jump
                let atom = match result.last_mut() {
                    Some(atom @ Atom::Jump1) => mem::replace(atom, Atom::Push(1)),
                    Some(atom @ Atom::Jump4) => mem::replace(atom, Atom::Push(4)),
                    Some(atom @ Atom::Ptr) => mem::replace(atom, Atom::Push(PTR_SKIP)),
                    _ => return Err(PatError::StackInvalid),
                };
                result.push(atom);
            },
            // End recursive operator
            b'}' => {
                // Unbalanced recursion
                if depth <= 0 {
                    return Err(PatError::StackError);
                }
                depth -= 1;
                result.push(Atom::Pop);
            },
            // Start subpattern
            b'(' => {
                subs.push(SubPattern::default());
                let sub = subs.last_mut().unwrap();
                // Keep the save and depth state
                sub.save = save;
                sub.depth = depth;
                // Add a new case, update the case offset later
                sub.case = result.len();
                result.push(Atom::Case(0));
            },
            // Case subpattern
            b'|' => {
                // Should already have started a subpattern
                let sub = subs.last_mut().ok_or(PatError::SubPattern)?;
                // Update the save state
                sub.save_next = cmp::max(sub.save_next, save);
                save = sub.save;
                depth = sub.depth;
                // Add a break of the previous subpattern
                sub.brks.push(result.len());
                result.push(Atom::Break(0));
                // Add a new case of the next subpattern
                let case_offset = result.len() - sub.case - 1;
                if case_offset >= 256 {
                    return Err(PatError::SubOverflow);
                }
                result[sub.case] = Atom::Case(case_offset as u8);
                sub.case = result.len();
                result.push(Atom::Case(0));
            },
            // End subpattern
            b')' => {
                // Should already have started a subpattern
                let sub = subs.pop().ok_or(PatError::SubPattern)?;
                // Prepare for the next save
                save = cmp::max(sub.save_next, save);
                depth = sub.depth;
                // Neutralize the last case, since there are no more
                result[sub.case] = Atom::Nop;
                // Fill in the breaks
                for &brk in &sub.brks {
                    let brk_offset = result.len() - brk - 1;
                    if brk_offset >= 256 {
                        return Err(PatError::SubOverflow);
                    }
                    result[brk] = Atom::Break(brk_offset as u8);
                }
            },
            // Skip many operator
            b'[' => {
                // Parse the lower bound
                let mut lower_bound = 0u32;
                let mut at_least_one_char = false;
                loop {
                    chr = iter.next().cloned().ok_or(PatError::ManyInvalid)?;
                    match chr {
                        b'-' | b']' => break,
                        chr @ b'0'..=b'9' => {
                            at_least_one_char = true;
                            lower_bound = lower_bound * 10 + (chr - b'0') as u32;
                            if lower_bound >= 16384 {
                                return Err(PatError::ManyOverflow);
                            }
                        },
                        _ => return Err(PatError::ManyInvalid),
                    }
                }
                if !at_least_one_char {
                    return Err(PatError::ManyInvalid);
                }
                // Turn the lower bound into skip ops
                if lower_bound > 0 {
                    if lower_bound >= 256 {
                        result.push(Atom::Rangext((lower_bound >> 8) as u8));
                    }
                    result.push(Atom::Skip((lower_bound & 0xff) as u8));
                }
                // Second many part is optional
                if chr == b']' {
                    continue;
                }
                // Parse the upper bound
                let mut upper_bound = 0u32;
                loop {
                    chr = iter.next().cloned().ok_or(PatError::ManyInvalid)?;
                    match chr {
                        b']' => break,
                        chr @ b'0'..=b'9' => {
                            upper_bound = upper_bound * 10 + (chr - b'0') as u32;
                            if upper_bound >= 16384 {
                                return Err(PatError::ManyOverflow);
                            }
                        },
                        _ => return Err(PatError::ManyInvalid),
                    }
                }
                // Lower bound should be strictly less than the upper bound
                if lower_bound < upper_bound {
                    let many_skip = upper_bound - lower_bound;
                    if many_skip >= 256 {
                        result.push(Atom::Rangext((many_skip >> 8) as u8));
                    }
                    result.push(Atom::Many((many_skip & 0xff) as u8));
                }
                else {
                    return Err(PatError::ManyRange);
                }
            },
            // Match a byte
            b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' | b'.' => {
                let mut mask = 0xFF;
                // High nibble of the byte
                let hi = if chr == b'.' { mask &= 0x0F;0 }
                else if chr >= b'a' { chr - b'a' + 10 }
                else if chr >= b'A' { chr - b'A' + 10 }
                else { chr - b'0' };
                chr = iter.next().cloned().ok_or(PatError::UnpairedHexDigit)?;
                // Low nibble of the byte
                let lo = if chr >= b'a' && chr <= b'f' { chr - b'a' + 10 }
                else if chr >= b'A' && chr <= b'F' { chr - b'A' + 10 }
                else if chr >= b'0' && chr <= b'9' { chr - b'0' }
                else if chr == b'.' { mask &= 0xF0; 0 }
                else { return Err(PatError::UnpairedHexDigit); };
                if mask == 0 { return Err(PatError::DoubleNibble); };
                // mask out nibble
                if mask != 0xFF { result.push(Atom::Fuzzy(mask)) }
                // Add byte to the pattern
                result.push(Atom::Byte((hi << 4) + lo));
            },
            // Match raw bytes
            b'"' => {
                loop {
                    if let Some(chr) = iter.next().cloned() {
                        if chr != b'"' {
                            result.push(Atom::Byte(chr));
                        }
                        else {
                            break;
                        }
                    }
                    else {
                        return Err(PatError::UnclosedQuote);
                    }
                }
            },
            // Save the cursor
            b'\'' => {
                // 'Limited' save space
                if save >= u8::MAX {
                    return Err(PatError::SaveOverflow);
                }
                result.push(Atom::Save(save));
                save += 1;
            },
            // Skip bytes
            b'?' => {
                // match result.last_mut() {
                // 	Some(Atom::Skip(skip)) if *skip != PTR_SKIP && *skip < 127i8 => *skip += 1,
                // 	_ => result.push(Atom::Skip(1)),
                // };
                // Coalescence skips together
                if let Some(Atom::Skip(skip)) = result.last_mut() {
                    if *skip != PTR_SKIP && *skip < 255u8 {
                        *skip += 1;
                        continue;
                    }
                }
                result.push(Atom::Skip(1));
            },
            b'=' => {
                let op = iter.next().cloned().ok_or(PatError::CheckOperand)?;
                result.push( match op {
                    b'0'..=b'9' => Atom::Check(op - b'0'),
                    b'A'..=b'Z' => Atom::Check(10 + (op - b'A')),
                    b'a'..=b'z' => Atom::Check(10 + (op - b'a')),
                    _ => return Err(PatError::CheckOperand)
                });
            },
            b'@' => {
                let op = iter.next().cloned().ok_or(PatError::AlignedOperand)?;
                result.push( match op {
                    b'0'..=b'9' => Atom::Aligned(op - b'0'),
                    b'A'..=b'Z' => Atom::Aligned(10 + (op - b'A')),
                    b'a'..=b'z' => Atom::Aligned(10 + (op - b'a')),
                    _ => return Err(PatError::AlignedOperand)
                });
            },
            b'i' => {
                let atom = match iter.next().cloned() {
                    Some(b'1') => Atom::ReadI8(save),
                    Some(b'2') => Atom::ReadI16(save),
                    Some(b'4') => Atom::ReadI32(save),
                    _ => return Err(PatError::ReadOperand),
                };
                if save >= u8::MAX {
                    return Err(PatError::SaveOverflow);
                }
                save += 1;
                result.push(atom);
            },
            b'u' => {
                let atom = match iter.next().cloned() {
                    Some(b'1') => Atom::ReadU8(save),
                    Some(b'2') => Atom::ReadU16(save),
                    Some(b'4') => Atom::ReadU32(save),
                    _ => return Err(PatError::ReadOperand),
                };
                if save >= u8::MAX {
                    return Err(PatError::SaveOverflow);
                }
                save += 1;
                result.push(atom);
            },
            b'z' => {
                if save >= u8::MAX {
                    return Err(PatError::SaveOverflow);
                }
                result.push(Atom::Zero(save));
                save += 1;
            },
            // Allow spaces as padding
            b' ' | b'\n' | b'\r' | b'\t' => {},
            // Everything else is illegal
            _ => {
                return Err(PatError::UnknownChar);
            },
        }
        // Converted from str originally, should be safe
        *pat = unsafe { str::from_utf8_unchecked(iter.as_slice()) };
    }
    // Check balanced stack operators
    if depth != 0 {
        return Err(PatError::StackError);
    }
    // Check if sub patterns are balanced
    if subs.len() != 0 {
        return Err(PatError::SubPattern);
    }
    // Remove redundant atoms at the end
    fn is_redundant(atom: &Atom) -> bool {
        match atom {
            | Atom::Skip(_)
            | Atom::Rangext(_)
            | Atom::Pop
            | Atom::Many(_) => true,
            _ => false,
        }
    }
    while result.last().map(is_redundant).unwrap_or(false) {
        result.pop();
    }
    Ok(())
 }