This commit is contained in:
Jessie
2024-01-26 21:03:30 -05:00
commit 7c2ba320fa
13 changed files with 547 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
/target

8
Cargo.toml Normal file
View File

@@ -0,0 +1,8 @@
[package]
name = "xrt"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

41
src/lib.rs Normal file
View File

@@ -0,0 +1,41 @@
#![no_std]
#[cfg(all(target_arch = "x86_64", target_os = "windows"))]
mod win64 {
mod memcpy;
mod memset;
mod strlen;
mod memmove;
mod memcmp;
}
#[allow(unused)]
extern "system" {
pub fn strlen(str: *const u8) -> usize;
pub fn memcpy(dest: *mut u8, src: *const u8, len: usize) -> usize;
pub fn memmove(dest: *mut u8, src: *const u8, len: usize) -> usize;
pub fn memset(dest: *mut u8, value: u8, len: usize) -> usize;
pub fn memcmp(ptr1: *const u8, ptr2: *const u8, len: usize) -> i32;
}

20
src/win64/memcmp.rs Normal file
View File

@@ -0,0 +1,20 @@
core::arch::global_asm!{r#"
.globl memcmp
memcmp:
xor eax, eax
xor r9d, r9d
.LBB0_1:
cmp r8, r9
je .LBB0_4
movzx r10d, byte ptr [rcx + r9]
movzx r11d, byte ptr [rdx + r9]
inc r9
cmp r10b, r11b
je .LBB0_1
xor eax, eax
cmp r10b, r11b
sbb eax, eax
or eax, 1
.LBB0_4:
ret
"#}

67
src/win64/memcpy.rs Normal file
View File

@@ -0,0 +1,67 @@
// https://github.com/nadavrot/memset_benchmark/blob/main/src/memcpy/impl.c
core::arch::global_asm!{r#"
.text
.globl memcpy
memcpy:
mov rax, rcx
cmp r8, 4
ja .LBB0_4
test r8, r8
je .LBB0_13
movzx ecx, byte ptr [rdx]
mov byte ptr [rax], cl
movzx ecx, byte ptr [rdx + r8 - 1]
mov byte ptr [rax + r8 - 1], cl
cmp r8, 3
jb .LBB0_13
movzx ecx, byte ptr [rdx + 1]
mov byte ptr [rax + 1], cl
movzx ecx, byte ptr [rdx + 2]
mov byte ptr [rax + 2], cl
ret
.LBB0_4:
cmp r8, 16
ja .LBB0_8
cmp r8, 8
jb .LBB0_7
mov rcx, qword ptr [rdx]
mov qword ptr [rax], rcx
mov rcx, qword ptr [rdx + r8 - 8]
mov qword ptr [rax + r8 - 8], rcx
ret
.LBB0_8:
cmp r8, 32
ja .LBB0_10
movups xmm0, xmmword ptr [rdx]
movups xmmword ptr [rax], xmm0
movups xmm0, xmmword ptr [rdx + r8 - 16]
movups xmmword ptr [rax + r8 - 16], xmm0
ret
.LBB0_7:
mov ecx, dword ptr [rdx]
mov dword ptr [rax], ecx
mov ecx, dword ptr [rdx + r8 - 4]
mov dword ptr [rax + r8 - 4], ecx
ret
.LBB0_10:
lea rcx, [rax + r8]
add rcx, -32
mov r9, rdx
mov r10, rax
.LBB0_11:
movups xmm0, xmmword ptr [r9]
movups xmm1, xmmword ptr [r9 + 16]
movups xmmword ptr [r10 + 16], xmm1
movups xmmword ptr [r10], xmm0
add r10, 32
add r9, 32
cmp r10, rcx
jb .LBB0_11
movups xmm0, xmmword ptr [rdx + r8 - 32]
movups xmm1, xmmword ptr [rdx + r8 - 16]
movups xmmword ptr [rcx + 16], xmm1
movups xmmword ptr [rcx], xmm0
.LBB0_13:
ret
"#}

159
src/win64/memmove.rs Normal file
View File

@@ -0,0 +1,159 @@
core::arch::global_asm!{r#"
.globl memmove
memmove:
push rsi
push rdi
sub rsp, 40
mov rsi, rcx
cmp rdx, rcx
je .LBB0_36
test r8, r8
je .LBB0_36
mov rcx, rsi
sub rcx, rdx
jbe .LBB0_18
movsxd rax, r8d
cmp rcx, rax
jge .LBB0_18
mov eax, r8d
dec eax
js .LBB0_36
cmp r8d, 4
jb .LBB0_16
mov ecx, eax
lea r9, [rdx + rcx]
add rcx, rsi
sub r9, rcx
cmp r9, 16
jb .LBB0_16
xor ecx, ecx
cmp r8d, 16
jb .LBB0_13
mov ecx, r8d
and ecx, -16
mov r9d, eax
mov r10d, ecx
.LBB0_9:
mov r11d, r9d
movups xmm0, xmmword ptr [rdx + r11 - 15]
movups xmmword ptr [rsi + r11 - 15], xmm0
add r9d, -16
add r10d, -16
jne .LBB0_9
cmp ecx, r8d
je .LBB0_36
test r8b, 12
je .LBB0_12
.LBB0_13:
mov r9d, r8d
and r9d, -4
sub eax, r9d
mov r10d, ecx
sub r10d, r9d
not ecx
add ecx, r8d
.LBB0_14:
mov r11d, ecx
mov edi, dword ptr [rdx + r11 - 3]
mov dword ptr [rsi + r11 - 3], edi
add ecx, -4
add r10d, 4
jne .LBB0_14
cmp r9d, r8d
je .LBB0_36
jmp .LBB0_16
.LBB0_18:
mov rax, rdx
sub rax, rsi
jbe .LBB0_35
movsxd r9, r8d
cmp rax, r9
jge .LBB0_35
xor eax, eax
cmp r8, 8
jb .LBB0_31
cmp rcx, 32
jb .LBB0_31
cmp r8, 32
jae .LBB0_24
xor eax, eax
jmp .LBB0_28
.LBB0_35:
mov rcx, rsi
call "memcpy"
.LBB0_36:
mov rax, rsi
add rsp, 40
pop rdi
pop rsi
ret
.LBB0_24:
mov rax, r8
and rax, -32
xor ecx, ecx
.LBB0_25:
movups xmm0, xmmword ptr [rdx + rcx]
movups xmm1, xmmword ptr [rdx + rcx + 16]
movups xmmword ptr [rsi + rcx], xmm0
movups xmmword ptr [rsi + rcx + 16], xmm1
add rcx, 32
cmp rax, rcx
jne .LBB0_25
cmp rax, r8
je .LBB0_36
test r8b, 24
je .LBB0_31
.LBB0_28:
mov rcx, rax
mov rax, r8
and rax, -8
.LBB0_29:
mov r9, qword ptr [rdx + rcx]
mov qword ptr [rsi + rcx], r9
add rcx, 8
cmp rax, rcx
jne .LBB0_29
cmp rax, r8
je .LBB0_36
.LBB0_31:
mov rcx, rax
not rcx
add rcx, r8
mov r9, r8
and r9, 3
je .LBB0_33
.LBB0_32:
movzx r10d, byte ptr [rdx + rax]
mov byte ptr [rsi + rax], r10b
inc rax
dec r9
jne .LBB0_32
.LBB0_33:
cmp rcx, 3
jb .LBB0_36
.LBB0_34:
movzx ecx, byte ptr [rdx + rax]
mov byte ptr [rsi + rax], cl
movzx ecx, byte ptr [rdx + rax + 1]
mov byte ptr [rsi + rax + 1], cl
movzx ecx, byte ptr [rdx + rax + 2]
mov byte ptr [rsi + rax + 2], cl
movzx ecx, byte ptr [rdx + rax + 3]
mov byte ptr [rsi + rax + 3], cl
add rax, 4
cmp r8, rax
jne .LBB0_34
jmp .LBB0_36
.LBB0_12:
sub eax, ecx
.LBB0_16:
mov eax, eax
.LBB0_17:
movzx ecx, byte ptr [rdx + rax]
mov byte ptr [rsi + rax], cl
dec rax
lea ecx, [rax + 1]
test ecx, ecx
jg .LBB0_17
jmp .LBB0_36
"#}

130
src/win64/memset.rs Normal file
View File

@@ -0,0 +1,130 @@
// https://github.com/nadavrot/memset_benchmark/blob/main/src/memset/impl.c
core::arch::global_asm!{r#"
.text
.globl memset
memset:
mov rax, rcx
cmp r8, 31
ja .LBB0_10
cmp r8, 4
ja .LBB0_5
test r8, r8
je .LBB0_26
mov byte ptr [rax], dl
mov byte ptr [r8 + rax - 1], dl
cmp r8, 3
jb .LBB0_26
mov byte ptr [rax + 1], dl
mov byte ptr [rax + 2], dl
ret
.LBB0_10:
movzx ecx, dl
movd xmm0, ecx
punpcklbw xmm0, xmm0
pshuflw xmm0, xmm0, 0
pshufd xmm0, xmm0, 0
cmp r8, 161
jb .LBB0_23
movdqu xmmword ptr [rax], xmm0
movdqu xmmword ptr [rax + 16], xmm0
mov rdx, rax
and rdx, -32
lea r9, [rax + r8]
lea rcx, [rax + r8]
add rcx, -32
lea r8, [rdx + 192]
cmp r8, r9
jae .LBB0_12
.LBB0_13:
movdqa xmmword ptr [r8 - 160], xmm0
movdqa xmmword ptr [r8 - 144], xmm0
movdqa xmmword ptr [r8 - 128], xmm0
movdqa xmmword ptr [r8 - 112], xmm0
movdqa xmmword ptr [r8 - 96], xmm0
movdqa xmmword ptr [r8 - 80], xmm0
movdqa xmmword ptr [r8 - 64], xmm0
movdqa xmmword ptr [r8 - 48], xmm0
movdqa xmmword ptr [r8 - 32], xmm0
movdqa xmmword ptr [r8 - 16], xmm0
add r8, 160
cmp r8, r9
jb .LBB0_13
add r8, -160
mov rdx, r8
cmp rdx, rcx
jb .LBB0_16
jmp .LBB0_17
.LBB0_5:
cmp r8, 16
ja .LBB0_9
movzx edx, dl
movabs rcx, 72340172838076673
imul rcx, rdx
cmp r8, 8
jb .LBB0_8
mov qword ptr [rax], rcx
mov qword ptr [rax + r8 - 8], rcx
ret
.LBB0_23:
lea rcx, [rax + r8]
add rcx, -32
mov rdx, rax
.LBB0_24:
movdqu xmmword ptr [rdx], xmm0
movdqu xmmword ptr [rdx + 16], xmm0
add rdx, 32
cmp rdx, rcx
jb .LBB0_24
jmp .LBB0_25
.LBB0_12:
add rdx, 32
cmp rdx, rcx
jae .LBB0_17
.LBB0_16:
movdqa xmmword ptr [rdx], xmm0
movdqa xmmword ptr [rdx + 16], xmm0
add rdx, 32
.LBB0_17:
cmp rdx, rcx
jb .LBB0_18
cmp rdx, rcx
jb .LBB0_20
.LBB0_21:
cmp rdx, rcx
jae .LBB0_25
.LBB0_22:
movdqa xmmword ptr [rdx], xmm0
movdqa xmmword ptr [rdx + 16], xmm0
.LBB0_25:
movdqu xmmword ptr [rcx], xmm0
movdqu xmmword ptr [rcx + 16], xmm0
.LBB0_26:
ret
.LBB0_9:
movzx ecx, dl
movd xmm0, ecx
punpcklbw xmm0, xmm0
pshuflw xmm0, xmm0, 0
pshufd xmm0, xmm0, 0
movdqu xmmword ptr [rax + r8 - 16], xmm0
movdqu xmmword ptr [rax], xmm0
ret
.LBB0_18:
movdqa xmmword ptr [rdx], xmm0
movdqa xmmword ptr [rdx + 16], xmm0
add rdx, 32
cmp rdx, rcx
jae .LBB0_21
.LBB0_20:
movdqa xmmword ptr [rdx], xmm0
movdqa xmmword ptr [rdx + 16], xmm0
add rdx, 32
cmp rdx, rcx
jb .LBB0_22
jmp .LBB0_25
.LBB0_8:
mov dword ptr [rax], ecx
mov dword ptr [rax + r8 - 4], ecx
ret
"#}

15
src/win64/strlen.rs Normal file
View File

@@ -0,0 +1,15 @@
core::arch::global_asm!{r#"
.globl strlen
strlen:
push rdi
mov rdx, rcx
mov rdi, rcx
xor rax, rax
mov rcx, -1
cld
repne scasb
mov rax, -2
sub rax, rcx
pop rdi
ret
"#}

16
tests/memcmp.rs Normal file
View File

@@ -0,0 +1,16 @@
#[test]
pub fn test_memcmp() {
let buffer1: [u8;8] = [0, 1, 2, 3, 4, 5, 6, 7];
let buffer2: [u8;8] = [0, 1, 2, 3, 4, 5, 6, 7];
let buffer3: [u8;8] = [0, 1, 2, 3, 4, 5, 6, 6];
let buffer4: [u8;8] = [0, 1, 2, 3, 4, 5, 6, 8];
assert_eq!( 0, unsafe {xrt::memcmp(buffer1.as_ptr(), buffer2.as_ptr(), 8)});
assert_eq!( 1, unsafe {xrt::memcmp(buffer1.as_ptr(), buffer3.as_ptr(), 8)}); // <0 - 7 has a greater value than 7
assert_eq!(-1, unsafe {xrt::memcmp(buffer1.as_ptr(), buffer4.as_ptr(), 8)}); // >0 - 7 has a lower value than 8
}

33
tests/memcpy.rs Normal file
View File

@@ -0,0 +1,33 @@
#[test]
pub fn test_memcpy() {
let mut buffer1: Vec<u8> = Vec::with_capacity(0x1000);
let mut buffer2: Vec<u8> = Vec::with_capacity(0x1000);
// fill buffer1 with _random_ bytes
let mut seed = 0x696969u64;
for _ in 0..0x200 {
buffer1.extend(seed.to_le_bytes());
seed = seed.wrapping_mul(6364136223846793005).wrapping_add(1)
}
// set buffer2's length to its capacity
unsafe { buffer2.set_len(0x1000); }
// copy buffer1 to buffer2
unsafe { xrt::memcpy(buffer2.as_mut_ptr(), buffer1.as_ptr(), 0x1000) };
assert_eq!(buffer1, buffer2);
}

23
tests/memmove.rs Normal file
View File

@@ -0,0 +1,23 @@
#[test]
pub fn test_memmove() {
let mut buffer: [u8;8] = [0, 1, 2, 3, 4, 5, 6, 7];
let expected: [u8;8] = [4, 5, 6, 7, 4, 5, 6, 7];
unsafe { xrt::memmove(buffer.as_mut_ptr(), buffer[4..].as_ptr(), 4); }
assert_eq!(buffer, expected);
let mut buffer: [u8;8] = [0, 1, 2, 3, 4, 5, 6, 7];
let expected: [u8;8] = [0, 0, 1, 2, 3, 4, 5, 6];
unsafe { xrt::memmove(buffer[1..].as_mut_ptr(), buffer.as_ptr(), 7); }
assert_eq!(buffer, expected);
}

23
tests/memset.rs Normal file
View File

@@ -0,0 +1,23 @@
#[test]
pub fn test_memset() {
let mut buffer1 = [1;10];
let mut buffer2 = [1;12];
let mut buffer3 = [1;2];
let mut buffer4 = [1;8];
unsafe { xrt::memset(buffer1.as_mut_ptr(), 1, buffer1.len()) };
unsafe { xrt::memset(buffer3.as_mut_ptr(), 3, buffer3.len()) };
unsafe { xrt::memset(buffer4.as_mut_ptr(), 4, buffer4.len()) };
unsafe { xrt::memset(buffer3.as_mut_ptr(), 3, buffer3.len()) };
unsafe { xrt::memset(buffer2.as_mut_ptr(), 2, buffer2.len()) };
assert_eq!(None, buffer1.iter().filter(|&&a|a!=1).next());
assert_eq!(None, buffer2.iter().filter(|&&a|a!=2).next());
assert_eq!(None, buffer3.iter().filter(|&&a|a!=3).next());
assert_eq!(None, buffer4.iter().filter(|&&a|a!=4).next());
}

11
tests/strlen.rs Normal file
View File

@@ -0,0 +1,11 @@
#[test]
pub fn test_strlen() {
let mut buffer: Vec<u8> = "a".repeat(1000).into();
buffer.push(0);
assert_eq!(unsafe { xrt::strlen(buffer.as_ptr()) }, 1000);
assert_eq!(unsafe { xrt::strlen((&buffer[1..]).as_ptr()) }, 999);
assert_eq!(unsafe { xrt::strlen((&buffer[8..]).as_ptr()) }, 992);
}