commit 7c2ba320fa924619a68cd5af0533bb8abfa04426 Author: Jessie Date: Fri Jan 26 21:03:30 2024 -0500 init diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..f037052 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "xrt" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..f6d1bc0 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,41 @@ +#![no_std] + +#[cfg(all(target_arch = "x86_64", target_os = "windows"))] +mod win64 { + mod memcpy; + mod memset; + mod strlen; + mod memmove; + mod memcmp; +} + +#[allow(unused)] +extern "system" { + pub fn strlen(str: *const u8) -> usize; + pub fn memcpy(dest: *mut u8, src: *const u8, len: usize) -> usize; + pub fn memmove(dest: *mut u8, src: *const u8, len: usize) -> usize; + pub fn memset(dest: *mut u8, value: u8, len: usize) -> usize; + pub fn memcmp(ptr1: *const u8, ptr2: *const u8, len: usize) -> i32; +} + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/win64/memcmp.rs b/src/win64/memcmp.rs new file mode 100644 index 0000000..8828519 --- /dev/null +++ b/src/win64/memcmp.rs @@ -0,0 +1,20 @@ +core::arch::global_asm!{r#" +.globl memcmp +memcmp: + xor eax, eax + xor r9d, r9d +.LBB0_1: + cmp r8, r9 + je .LBB0_4 + movzx r10d, byte ptr [rcx + r9] + movzx r11d, byte ptr [rdx + r9] + inc r9 + cmp r10b, r11b + je .LBB0_1 + xor eax, eax + cmp r10b, r11b + sbb eax, eax + or eax, 1 +.LBB0_4: + ret +"#} diff --git a/src/win64/memcpy.rs b/src/win64/memcpy.rs new file mode 100644 index 0000000..32aadb6 --- /dev/null +++ b/src/win64/memcpy.rs @@ -0,0 +1,67 @@ +// https://github.com/nadavrot/memset_benchmark/blob/main/src/memcpy/impl.c + +core::arch::global_asm!{r#" +.text +.globl memcpy +memcpy: + mov rax, rcx + cmp r8, 4 + ja .LBB0_4 + test r8, r8 + je .LBB0_13 + movzx ecx, byte ptr [rdx] + mov byte ptr [rax], cl + movzx ecx, byte ptr [rdx + r8 - 1] + mov byte ptr [rax + r8 - 1], cl + cmp r8, 3 + jb .LBB0_13 + movzx ecx, byte ptr [rdx + 1] + mov byte ptr [rax + 1], cl + movzx ecx, byte ptr [rdx + 2] + mov byte ptr [rax + 2], cl + ret +.LBB0_4: + cmp r8, 16 + ja .LBB0_8 + cmp r8, 8 + jb .LBB0_7 + mov rcx, qword ptr [rdx] + mov qword ptr [rax], rcx + mov rcx, qword ptr [rdx + r8 - 8] + mov qword ptr [rax + r8 - 8], rcx + ret +.LBB0_8: + cmp r8, 32 + ja .LBB0_10 + movups xmm0, xmmword ptr [rdx] + movups xmmword ptr [rax], xmm0 + movups xmm0, xmmword ptr [rdx + r8 - 16] + movups xmmword ptr [rax + r8 - 16], xmm0 + ret +.LBB0_7: + mov ecx, dword ptr [rdx] + mov dword ptr [rax], ecx + mov ecx, dword ptr [rdx + r8 - 4] + mov dword ptr [rax + r8 - 4], ecx + ret +.LBB0_10: + lea rcx, [rax + r8] + add rcx, -32 + mov r9, rdx + mov r10, rax +.LBB0_11: + movups xmm0, xmmword ptr [r9] + movups xmm1, xmmword ptr [r9 + 16] + movups xmmword ptr [r10 + 16], xmm1 + movups xmmword ptr [r10], xmm0 + add r10, 32 + add r9, 32 + cmp r10, rcx + jb .LBB0_11 + movups xmm0, xmmword ptr [rdx + r8 - 32] + movups xmm1, xmmword ptr [rdx + r8 - 16] + movups xmmword ptr [rcx + 16], xmm1 + movups xmmword ptr [rcx], xmm0 +.LBB0_13: + ret +"#} \ No newline at end of file diff --git a/src/win64/memmove.rs b/src/win64/memmove.rs new file mode 100644 index 0000000..a44c32b --- /dev/null +++ b/src/win64/memmove.rs @@ -0,0 +1,159 @@ +core::arch::global_asm!{r#" +.globl memmove +memmove: + push rsi + push rdi + sub rsp, 40 + mov rsi, rcx + cmp rdx, rcx + je .LBB0_36 + test r8, r8 + je .LBB0_36 + mov rcx, rsi + sub rcx, rdx + jbe .LBB0_18 + movsxd rax, r8d + cmp rcx, rax + jge .LBB0_18 + mov eax, r8d + dec eax + js .LBB0_36 + cmp r8d, 4 + jb .LBB0_16 + mov ecx, eax + lea r9, [rdx + rcx] + add rcx, rsi + sub r9, rcx + cmp r9, 16 + jb .LBB0_16 + xor ecx, ecx + cmp r8d, 16 + jb .LBB0_13 + mov ecx, r8d + and ecx, -16 + mov r9d, eax + mov r10d, ecx +.LBB0_9: + mov r11d, r9d + movups xmm0, xmmword ptr [rdx + r11 - 15] + movups xmmword ptr [rsi + r11 - 15], xmm0 + add r9d, -16 + add r10d, -16 + jne .LBB0_9 + cmp ecx, r8d + je .LBB0_36 + test r8b, 12 + je .LBB0_12 +.LBB0_13: + mov r9d, r8d + and r9d, -4 + sub eax, r9d + mov r10d, ecx + sub r10d, r9d + not ecx + add ecx, r8d +.LBB0_14: + mov r11d, ecx + mov edi, dword ptr [rdx + r11 - 3] + mov dword ptr [rsi + r11 - 3], edi + add ecx, -4 + add r10d, 4 + jne .LBB0_14 + cmp r9d, r8d + je .LBB0_36 + jmp .LBB0_16 +.LBB0_18: + mov rax, rdx + sub rax, rsi + jbe .LBB0_35 + movsxd r9, r8d + cmp rax, r9 + jge .LBB0_35 + xor eax, eax + cmp r8, 8 + jb .LBB0_31 + cmp rcx, 32 + jb .LBB0_31 + cmp r8, 32 + jae .LBB0_24 + xor eax, eax + jmp .LBB0_28 +.LBB0_35: + mov rcx, rsi + call "memcpy" +.LBB0_36: + mov rax, rsi + add rsp, 40 + pop rdi + pop rsi + ret +.LBB0_24: + mov rax, r8 + and rax, -32 + xor ecx, ecx +.LBB0_25: + movups xmm0, xmmword ptr [rdx + rcx] + movups xmm1, xmmword ptr [rdx + rcx + 16] + movups xmmword ptr [rsi + rcx], xmm0 + movups xmmword ptr [rsi + rcx + 16], xmm1 + add rcx, 32 + cmp rax, rcx + jne .LBB0_25 + cmp rax, r8 + je .LBB0_36 + test r8b, 24 + je .LBB0_31 +.LBB0_28: + mov rcx, rax + mov rax, r8 + and rax, -8 +.LBB0_29: + mov r9, qword ptr [rdx + rcx] + mov qword ptr [rsi + rcx], r9 + add rcx, 8 + cmp rax, rcx + jne .LBB0_29 + cmp rax, r8 + je .LBB0_36 +.LBB0_31: + mov rcx, rax + not rcx + add rcx, r8 + mov r9, r8 + and r9, 3 + je .LBB0_33 +.LBB0_32: + movzx r10d, byte ptr [rdx + rax] + mov byte ptr [rsi + rax], r10b + inc rax + dec r9 + jne .LBB0_32 +.LBB0_33: + cmp rcx, 3 + jb .LBB0_36 +.LBB0_34: + movzx ecx, byte ptr [rdx + rax] + mov byte ptr [rsi + rax], cl + movzx ecx, byte ptr [rdx + rax + 1] + mov byte ptr [rsi + rax + 1], cl + movzx ecx, byte ptr [rdx + rax + 2] + mov byte ptr [rsi + rax + 2], cl + movzx ecx, byte ptr [rdx + rax + 3] + mov byte ptr [rsi + rax + 3], cl + add rax, 4 + cmp r8, rax + jne .LBB0_34 + jmp .LBB0_36 +.LBB0_12: + sub eax, ecx +.LBB0_16: + mov eax, eax +.LBB0_17: + movzx ecx, byte ptr [rdx + rax] + mov byte ptr [rsi + rax], cl + dec rax + lea ecx, [rax + 1] + test ecx, ecx + jg .LBB0_17 + jmp .LBB0_36 +"#} diff --git a/src/win64/memset.rs b/src/win64/memset.rs new file mode 100644 index 0000000..a36b987 --- /dev/null +++ b/src/win64/memset.rs @@ -0,0 +1,130 @@ +// https://github.com/nadavrot/memset_benchmark/blob/main/src/memset/impl.c + +core::arch::global_asm!{r#" +.text +.globl memset +memset: + mov rax, rcx + cmp r8, 31 + ja .LBB0_10 + cmp r8, 4 + ja .LBB0_5 + test r8, r8 + je .LBB0_26 + mov byte ptr [rax], dl + mov byte ptr [r8 + rax - 1], dl + cmp r8, 3 + jb .LBB0_26 + mov byte ptr [rax + 1], dl + mov byte ptr [rax + 2], dl + ret +.LBB0_10: + movzx ecx, dl + movd xmm0, ecx + punpcklbw xmm0, xmm0 + pshuflw xmm0, xmm0, 0 + pshufd xmm0, xmm0, 0 + cmp r8, 161 + jb .LBB0_23 + movdqu xmmword ptr [rax], xmm0 + movdqu xmmword ptr [rax + 16], xmm0 + mov rdx, rax + and rdx, -32 + lea r9, [rax + r8] + lea rcx, [rax + r8] + add rcx, -32 + lea r8, [rdx + 192] + cmp r8, r9 + jae .LBB0_12 +.LBB0_13: + movdqa xmmword ptr [r8 - 160], xmm0 + movdqa xmmword ptr [r8 - 144], xmm0 + movdqa xmmword ptr [r8 - 128], xmm0 + movdqa xmmword ptr [r8 - 112], xmm0 + movdqa xmmword ptr [r8 - 96], xmm0 + movdqa xmmword ptr [r8 - 80], xmm0 + movdqa xmmword ptr [r8 - 64], xmm0 + movdqa xmmword ptr [r8 - 48], xmm0 + movdqa xmmword ptr [r8 - 32], xmm0 + movdqa xmmword ptr [r8 - 16], xmm0 + add r8, 160 + cmp r8, r9 + jb .LBB0_13 + add r8, -160 + mov rdx, r8 + cmp rdx, rcx + jb .LBB0_16 + jmp .LBB0_17 +.LBB0_5: + cmp r8, 16 + ja .LBB0_9 + movzx edx, dl + movabs rcx, 72340172838076673 + imul rcx, rdx + cmp r8, 8 + jb .LBB0_8 + mov qword ptr [rax], rcx + mov qword ptr [rax + r8 - 8], rcx + ret +.LBB0_23: + lea rcx, [rax + r8] + add rcx, -32 + mov rdx, rax +.LBB0_24: + movdqu xmmword ptr [rdx], xmm0 + movdqu xmmword ptr [rdx + 16], xmm0 + add rdx, 32 + cmp rdx, rcx + jb .LBB0_24 + jmp .LBB0_25 +.LBB0_12: + add rdx, 32 + cmp rdx, rcx + jae .LBB0_17 +.LBB0_16: + movdqa xmmword ptr [rdx], xmm0 + movdqa xmmword ptr [rdx + 16], xmm0 + add rdx, 32 +.LBB0_17: + cmp rdx, rcx + jb .LBB0_18 + cmp rdx, rcx + jb .LBB0_20 +.LBB0_21: + cmp rdx, rcx + jae .LBB0_25 +.LBB0_22: + movdqa xmmword ptr [rdx], xmm0 + movdqa xmmword ptr [rdx + 16], xmm0 +.LBB0_25: + movdqu xmmword ptr [rcx], xmm0 + movdqu xmmword ptr [rcx + 16], xmm0 +.LBB0_26: + ret +.LBB0_9: + movzx ecx, dl + movd xmm0, ecx + punpcklbw xmm0, xmm0 + pshuflw xmm0, xmm0, 0 + pshufd xmm0, xmm0, 0 + movdqu xmmword ptr [rax + r8 - 16], xmm0 + movdqu xmmword ptr [rax], xmm0 + ret +.LBB0_18: + movdqa xmmword ptr [rdx], xmm0 + movdqa xmmword ptr [rdx + 16], xmm0 + add rdx, 32 + cmp rdx, rcx + jae .LBB0_21 +.LBB0_20: + movdqa xmmword ptr [rdx], xmm0 + movdqa xmmword ptr [rdx + 16], xmm0 + add rdx, 32 + cmp rdx, rcx + jb .LBB0_22 + jmp .LBB0_25 +.LBB0_8: + mov dword ptr [rax], ecx + mov dword ptr [rax + r8 - 4], ecx + ret +"#} \ No newline at end of file diff --git a/src/win64/strlen.rs b/src/win64/strlen.rs new file mode 100644 index 0000000..7f20be9 --- /dev/null +++ b/src/win64/strlen.rs @@ -0,0 +1,15 @@ +core::arch::global_asm!{r#" +.globl strlen +strlen: + push rdi + mov rdx, rcx + mov rdi, rcx + xor rax, rax + mov rcx, -1 + cld + repne scasb + mov rax, -2 + sub rax, rcx + pop rdi + ret +"#} diff --git a/tests/memcmp.rs b/tests/memcmp.rs new file mode 100644 index 0000000..1f63944 --- /dev/null +++ b/tests/memcmp.rs @@ -0,0 +1,16 @@ + + +#[test] +pub fn test_memcmp() { + + let buffer1: [u8;8] = [0, 1, 2, 3, 4, 5, 6, 7]; + let buffer2: [u8;8] = [0, 1, 2, 3, 4, 5, 6, 7]; + let buffer3: [u8;8] = [0, 1, 2, 3, 4, 5, 6, 6]; + let buffer4: [u8;8] = [0, 1, 2, 3, 4, 5, 6, 8]; + + assert_eq!( 0, unsafe {xrt::memcmp(buffer1.as_ptr(), buffer2.as_ptr(), 8)}); + assert_eq!( 1, unsafe {xrt::memcmp(buffer1.as_ptr(), buffer3.as_ptr(), 8)}); // <0 - 7 has a greater value than 7 + assert_eq!(-1, unsafe {xrt::memcmp(buffer1.as_ptr(), buffer4.as_ptr(), 8)}); // >0 - 7 has a lower value than 8 + +} + diff --git a/tests/memcpy.rs b/tests/memcpy.rs new file mode 100644 index 0000000..cb87bb3 --- /dev/null +++ b/tests/memcpy.rs @@ -0,0 +1,33 @@ + + + + + + + + + + +#[test] +pub fn test_memcpy() { + let mut buffer1: Vec = Vec::with_capacity(0x1000); + let mut buffer2: Vec = Vec::with_capacity(0x1000); + + + // fill buffer1 with _random_ bytes + let mut seed = 0x696969u64; + for _ in 0..0x200 { + buffer1.extend(seed.to_le_bytes()); + seed = seed.wrapping_mul(6364136223846793005).wrapping_add(1) + } + + // set buffer2's length to its capacity + unsafe { buffer2.set_len(0x1000); } + + // copy buffer1 to buffer2 + unsafe { xrt::memcpy(buffer2.as_mut_ptr(), buffer1.as_ptr(), 0x1000) }; + + assert_eq!(buffer1, buffer2); + +} + diff --git a/tests/memmove.rs b/tests/memmove.rs new file mode 100644 index 0000000..9636b13 --- /dev/null +++ b/tests/memmove.rs @@ -0,0 +1,23 @@ + + + + + + + + + + +#[test] +pub fn test_memmove() { + let mut buffer: [u8;8] = [0, 1, 2, 3, 4, 5, 6, 7]; + let expected: [u8;8] = [4, 5, 6, 7, 4, 5, 6, 7]; + unsafe { xrt::memmove(buffer.as_mut_ptr(), buffer[4..].as_ptr(), 4); } + assert_eq!(buffer, expected); + + let mut buffer: [u8;8] = [0, 1, 2, 3, 4, 5, 6, 7]; + let expected: [u8;8] = [0, 0, 1, 2, 3, 4, 5, 6]; + unsafe { xrt::memmove(buffer[1..].as_mut_ptr(), buffer.as_ptr(), 7); } + assert_eq!(buffer, expected); +} + diff --git a/tests/memset.rs b/tests/memset.rs new file mode 100644 index 0000000..dcbfa1e --- /dev/null +++ b/tests/memset.rs @@ -0,0 +1,23 @@ + + +#[test] +pub fn test_memset() { + + let mut buffer1 = [1;10]; + let mut buffer2 = [1;12]; + let mut buffer3 = [1;2]; + let mut buffer4 = [1;8]; + + unsafe { xrt::memset(buffer1.as_mut_ptr(), 1, buffer1.len()) }; + unsafe { xrt::memset(buffer3.as_mut_ptr(), 3, buffer3.len()) }; + unsafe { xrt::memset(buffer4.as_mut_ptr(), 4, buffer4.len()) }; + unsafe { xrt::memset(buffer3.as_mut_ptr(), 3, buffer3.len()) }; + unsafe { xrt::memset(buffer2.as_mut_ptr(), 2, buffer2.len()) }; + + assert_eq!(None, buffer1.iter().filter(|&&a|a!=1).next()); + assert_eq!(None, buffer2.iter().filter(|&&a|a!=2).next()); + assert_eq!(None, buffer3.iter().filter(|&&a|a!=3).next()); + assert_eq!(None, buffer4.iter().filter(|&&a|a!=4).next()); + +} + diff --git a/tests/strlen.rs b/tests/strlen.rs new file mode 100644 index 0000000..8413cb5 --- /dev/null +++ b/tests/strlen.rs @@ -0,0 +1,11 @@ + + +#[test] +pub fn test_strlen() { + let mut buffer: Vec = "a".repeat(1000).into(); + buffer.push(0); + assert_eq!(unsafe { xrt::strlen(buffer.as_ptr()) }, 1000); + assert_eq!(unsafe { xrt::strlen((&buffer[1..]).as_ptr()) }, 999); + assert_eq!(unsafe { xrt::strlen((&buffer[8..]).as_ptr()) }, 992); +} +