init
This commit is contained in:
41
src/lib.rs
Normal file
41
src/lib.rs
Normal file
@@ -0,0 +1,41 @@
|
||||
#![no_std]
|
||||
|
||||
#[cfg(all(target_arch = "x86_64", target_os = "windows"))]
|
||||
mod win64 {
|
||||
mod memcpy;
|
||||
mod memset;
|
||||
mod strlen;
|
||||
mod memmove;
|
||||
mod memcmp;
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
extern "system" {
|
||||
pub fn strlen(str: *const u8) -> usize;
|
||||
pub fn memcpy(dest: *mut u8, src: *const u8, len: usize) -> usize;
|
||||
pub fn memmove(dest: *mut u8, src: *const u8, len: usize) -> usize;
|
||||
pub fn memset(dest: *mut u8, value: u8, len: usize) -> usize;
|
||||
pub fn memcmp(ptr1: *const u8, ptr2: *const u8, len: usize) -> i32;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
20
src/win64/memcmp.rs
Normal file
20
src/win64/memcmp.rs
Normal file
@@ -0,0 +1,20 @@
|
||||
core::arch::global_asm!{r#"
|
||||
.globl memcmp
|
||||
memcmp:
|
||||
xor eax, eax
|
||||
xor r9d, r9d
|
||||
.LBB0_1:
|
||||
cmp r8, r9
|
||||
je .LBB0_4
|
||||
movzx r10d, byte ptr [rcx + r9]
|
||||
movzx r11d, byte ptr [rdx + r9]
|
||||
inc r9
|
||||
cmp r10b, r11b
|
||||
je .LBB0_1
|
||||
xor eax, eax
|
||||
cmp r10b, r11b
|
||||
sbb eax, eax
|
||||
or eax, 1
|
||||
.LBB0_4:
|
||||
ret
|
||||
"#}
|
||||
67
src/win64/memcpy.rs
Normal file
67
src/win64/memcpy.rs
Normal file
@@ -0,0 +1,67 @@
|
||||
// https://github.com/nadavrot/memset_benchmark/blob/main/src/memcpy/impl.c
|
||||
|
||||
core::arch::global_asm!{r#"
|
||||
.text
|
||||
.globl memcpy
|
||||
memcpy:
|
||||
mov rax, rcx
|
||||
cmp r8, 4
|
||||
ja .LBB0_4
|
||||
test r8, r8
|
||||
je .LBB0_13
|
||||
movzx ecx, byte ptr [rdx]
|
||||
mov byte ptr [rax], cl
|
||||
movzx ecx, byte ptr [rdx + r8 - 1]
|
||||
mov byte ptr [rax + r8 - 1], cl
|
||||
cmp r8, 3
|
||||
jb .LBB0_13
|
||||
movzx ecx, byte ptr [rdx + 1]
|
||||
mov byte ptr [rax + 1], cl
|
||||
movzx ecx, byte ptr [rdx + 2]
|
||||
mov byte ptr [rax + 2], cl
|
||||
ret
|
||||
.LBB0_4:
|
||||
cmp r8, 16
|
||||
ja .LBB0_8
|
||||
cmp r8, 8
|
||||
jb .LBB0_7
|
||||
mov rcx, qword ptr [rdx]
|
||||
mov qword ptr [rax], rcx
|
||||
mov rcx, qword ptr [rdx + r8 - 8]
|
||||
mov qword ptr [rax + r8 - 8], rcx
|
||||
ret
|
||||
.LBB0_8:
|
||||
cmp r8, 32
|
||||
ja .LBB0_10
|
||||
movups xmm0, xmmword ptr [rdx]
|
||||
movups xmmword ptr [rax], xmm0
|
||||
movups xmm0, xmmword ptr [rdx + r8 - 16]
|
||||
movups xmmword ptr [rax + r8 - 16], xmm0
|
||||
ret
|
||||
.LBB0_7:
|
||||
mov ecx, dword ptr [rdx]
|
||||
mov dword ptr [rax], ecx
|
||||
mov ecx, dword ptr [rdx + r8 - 4]
|
||||
mov dword ptr [rax + r8 - 4], ecx
|
||||
ret
|
||||
.LBB0_10:
|
||||
lea rcx, [rax + r8]
|
||||
add rcx, -32
|
||||
mov r9, rdx
|
||||
mov r10, rax
|
||||
.LBB0_11:
|
||||
movups xmm0, xmmword ptr [r9]
|
||||
movups xmm1, xmmword ptr [r9 + 16]
|
||||
movups xmmword ptr [r10 + 16], xmm1
|
||||
movups xmmword ptr [r10], xmm0
|
||||
add r10, 32
|
||||
add r9, 32
|
||||
cmp r10, rcx
|
||||
jb .LBB0_11
|
||||
movups xmm0, xmmword ptr [rdx + r8 - 32]
|
||||
movups xmm1, xmmword ptr [rdx + r8 - 16]
|
||||
movups xmmword ptr [rcx + 16], xmm1
|
||||
movups xmmword ptr [rcx], xmm0
|
||||
.LBB0_13:
|
||||
ret
|
||||
"#}
|
||||
159
src/win64/memmove.rs
Normal file
159
src/win64/memmove.rs
Normal file
@@ -0,0 +1,159 @@
|
||||
core::arch::global_asm!{r#"
|
||||
.globl memmove
|
||||
memmove:
|
||||
push rsi
|
||||
push rdi
|
||||
sub rsp, 40
|
||||
mov rsi, rcx
|
||||
cmp rdx, rcx
|
||||
je .LBB0_36
|
||||
test r8, r8
|
||||
je .LBB0_36
|
||||
mov rcx, rsi
|
||||
sub rcx, rdx
|
||||
jbe .LBB0_18
|
||||
movsxd rax, r8d
|
||||
cmp rcx, rax
|
||||
jge .LBB0_18
|
||||
mov eax, r8d
|
||||
dec eax
|
||||
js .LBB0_36
|
||||
cmp r8d, 4
|
||||
jb .LBB0_16
|
||||
mov ecx, eax
|
||||
lea r9, [rdx + rcx]
|
||||
add rcx, rsi
|
||||
sub r9, rcx
|
||||
cmp r9, 16
|
||||
jb .LBB0_16
|
||||
xor ecx, ecx
|
||||
cmp r8d, 16
|
||||
jb .LBB0_13
|
||||
mov ecx, r8d
|
||||
and ecx, -16
|
||||
mov r9d, eax
|
||||
mov r10d, ecx
|
||||
.LBB0_9:
|
||||
mov r11d, r9d
|
||||
movups xmm0, xmmword ptr [rdx + r11 - 15]
|
||||
movups xmmword ptr [rsi + r11 - 15], xmm0
|
||||
add r9d, -16
|
||||
add r10d, -16
|
||||
jne .LBB0_9
|
||||
cmp ecx, r8d
|
||||
je .LBB0_36
|
||||
test r8b, 12
|
||||
je .LBB0_12
|
||||
.LBB0_13:
|
||||
mov r9d, r8d
|
||||
and r9d, -4
|
||||
sub eax, r9d
|
||||
mov r10d, ecx
|
||||
sub r10d, r9d
|
||||
not ecx
|
||||
add ecx, r8d
|
||||
.LBB0_14:
|
||||
mov r11d, ecx
|
||||
mov edi, dword ptr [rdx + r11 - 3]
|
||||
mov dword ptr [rsi + r11 - 3], edi
|
||||
add ecx, -4
|
||||
add r10d, 4
|
||||
jne .LBB0_14
|
||||
cmp r9d, r8d
|
||||
je .LBB0_36
|
||||
jmp .LBB0_16
|
||||
.LBB0_18:
|
||||
mov rax, rdx
|
||||
sub rax, rsi
|
||||
jbe .LBB0_35
|
||||
movsxd r9, r8d
|
||||
cmp rax, r9
|
||||
jge .LBB0_35
|
||||
xor eax, eax
|
||||
cmp r8, 8
|
||||
jb .LBB0_31
|
||||
cmp rcx, 32
|
||||
jb .LBB0_31
|
||||
cmp r8, 32
|
||||
jae .LBB0_24
|
||||
xor eax, eax
|
||||
jmp .LBB0_28
|
||||
.LBB0_35:
|
||||
mov rcx, rsi
|
||||
call "memcpy"
|
||||
.LBB0_36:
|
||||
mov rax, rsi
|
||||
add rsp, 40
|
||||
pop rdi
|
||||
pop rsi
|
||||
ret
|
||||
.LBB0_24:
|
||||
mov rax, r8
|
||||
and rax, -32
|
||||
xor ecx, ecx
|
||||
.LBB0_25:
|
||||
movups xmm0, xmmword ptr [rdx + rcx]
|
||||
movups xmm1, xmmword ptr [rdx + rcx + 16]
|
||||
movups xmmword ptr [rsi + rcx], xmm0
|
||||
movups xmmword ptr [rsi + rcx + 16], xmm1
|
||||
add rcx, 32
|
||||
cmp rax, rcx
|
||||
jne .LBB0_25
|
||||
cmp rax, r8
|
||||
je .LBB0_36
|
||||
test r8b, 24
|
||||
je .LBB0_31
|
||||
.LBB0_28:
|
||||
mov rcx, rax
|
||||
mov rax, r8
|
||||
and rax, -8
|
||||
.LBB0_29:
|
||||
mov r9, qword ptr [rdx + rcx]
|
||||
mov qword ptr [rsi + rcx], r9
|
||||
add rcx, 8
|
||||
cmp rax, rcx
|
||||
jne .LBB0_29
|
||||
cmp rax, r8
|
||||
je .LBB0_36
|
||||
.LBB0_31:
|
||||
mov rcx, rax
|
||||
not rcx
|
||||
add rcx, r8
|
||||
mov r9, r8
|
||||
and r9, 3
|
||||
je .LBB0_33
|
||||
.LBB0_32:
|
||||
movzx r10d, byte ptr [rdx + rax]
|
||||
mov byte ptr [rsi + rax], r10b
|
||||
inc rax
|
||||
dec r9
|
||||
jne .LBB0_32
|
||||
.LBB0_33:
|
||||
cmp rcx, 3
|
||||
jb .LBB0_36
|
||||
.LBB0_34:
|
||||
movzx ecx, byte ptr [rdx + rax]
|
||||
mov byte ptr [rsi + rax], cl
|
||||
movzx ecx, byte ptr [rdx + rax + 1]
|
||||
mov byte ptr [rsi + rax + 1], cl
|
||||
movzx ecx, byte ptr [rdx + rax + 2]
|
||||
mov byte ptr [rsi + rax + 2], cl
|
||||
movzx ecx, byte ptr [rdx + rax + 3]
|
||||
mov byte ptr [rsi + rax + 3], cl
|
||||
add rax, 4
|
||||
cmp r8, rax
|
||||
jne .LBB0_34
|
||||
jmp .LBB0_36
|
||||
.LBB0_12:
|
||||
sub eax, ecx
|
||||
.LBB0_16:
|
||||
mov eax, eax
|
||||
.LBB0_17:
|
||||
movzx ecx, byte ptr [rdx + rax]
|
||||
mov byte ptr [rsi + rax], cl
|
||||
dec rax
|
||||
lea ecx, [rax + 1]
|
||||
test ecx, ecx
|
||||
jg .LBB0_17
|
||||
jmp .LBB0_36
|
||||
"#}
|
||||
130
src/win64/memset.rs
Normal file
130
src/win64/memset.rs
Normal file
@@ -0,0 +1,130 @@
|
||||
// https://github.com/nadavrot/memset_benchmark/blob/main/src/memset/impl.c
|
||||
|
||||
core::arch::global_asm!{r#"
|
||||
.text
|
||||
.globl memset
|
||||
memset:
|
||||
mov rax, rcx
|
||||
cmp r8, 31
|
||||
ja .LBB0_10
|
||||
cmp r8, 4
|
||||
ja .LBB0_5
|
||||
test r8, r8
|
||||
je .LBB0_26
|
||||
mov byte ptr [rax], dl
|
||||
mov byte ptr [r8 + rax - 1], dl
|
||||
cmp r8, 3
|
||||
jb .LBB0_26
|
||||
mov byte ptr [rax + 1], dl
|
||||
mov byte ptr [rax + 2], dl
|
||||
ret
|
||||
.LBB0_10:
|
||||
movzx ecx, dl
|
||||
movd xmm0, ecx
|
||||
punpcklbw xmm0, xmm0
|
||||
pshuflw xmm0, xmm0, 0
|
||||
pshufd xmm0, xmm0, 0
|
||||
cmp r8, 161
|
||||
jb .LBB0_23
|
||||
movdqu xmmword ptr [rax], xmm0
|
||||
movdqu xmmword ptr [rax + 16], xmm0
|
||||
mov rdx, rax
|
||||
and rdx, -32
|
||||
lea r9, [rax + r8]
|
||||
lea rcx, [rax + r8]
|
||||
add rcx, -32
|
||||
lea r8, [rdx + 192]
|
||||
cmp r8, r9
|
||||
jae .LBB0_12
|
||||
.LBB0_13:
|
||||
movdqa xmmword ptr [r8 - 160], xmm0
|
||||
movdqa xmmword ptr [r8 - 144], xmm0
|
||||
movdqa xmmword ptr [r8 - 128], xmm0
|
||||
movdqa xmmword ptr [r8 - 112], xmm0
|
||||
movdqa xmmword ptr [r8 - 96], xmm0
|
||||
movdqa xmmword ptr [r8 - 80], xmm0
|
||||
movdqa xmmword ptr [r8 - 64], xmm0
|
||||
movdqa xmmword ptr [r8 - 48], xmm0
|
||||
movdqa xmmword ptr [r8 - 32], xmm0
|
||||
movdqa xmmword ptr [r8 - 16], xmm0
|
||||
add r8, 160
|
||||
cmp r8, r9
|
||||
jb .LBB0_13
|
||||
add r8, -160
|
||||
mov rdx, r8
|
||||
cmp rdx, rcx
|
||||
jb .LBB0_16
|
||||
jmp .LBB0_17
|
||||
.LBB0_5:
|
||||
cmp r8, 16
|
||||
ja .LBB0_9
|
||||
movzx edx, dl
|
||||
movabs rcx, 72340172838076673
|
||||
imul rcx, rdx
|
||||
cmp r8, 8
|
||||
jb .LBB0_8
|
||||
mov qword ptr [rax], rcx
|
||||
mov qword ptr [rax + r8 - 8], rcx
|
||||
ret
|
||||
.LBB0_23:
|
||||
lea rcx, [rax + r8]
|
||||
add rcx, -32
|
||||
mov rdx, rax
|
||||
.LBB0_24:
|
||||
movdqu xmmword ptr [rdx], xmm0
|
||||
movdqu xmmword ptr [rdx + 16], xmm0
|
||||
add rdx, 32
|
||||
cmp rdx, rcx
|
||||
jb .LBB0_24
|
||||
jmp .LBB0_25
|
||||
.LBB0_12:
|
||||
add rdx, 32
|
||||
cmp rdx, rcx
|
||||
jae .LBB0_17
|
||||
.LBB0_16:
|
||||
movdqa xmmword ptr [rdx], xmm0
|
||||
movdqa xmmword ptr [rdx + 16], xmm0
|
||||
add rdx, 32
|
||||
.LBB0_17:
|
||||
cmp rdx, rcx
|
||||
jb .LBB0_18
|
||||
cmp rdx, rcx
|
||||
jb .LBB0_20
|
||||
.LBB0_21:
|
||||
cmp rdx, rcx
|
||||
jae .LBB0_25
|
||||
.LBB0_22:
|
||||
movdqa xmmword ptr [rdx], xmm0
|
||||
movdqa xmmword ptr [rdx + 16], xmm0
|
||||
.LBB0_25:
|
||||
movdqu xmmword ptr [rcx], xmm0
|
||||
movdqu xmmword ptr [rcx + 16], xmm0
|
||||
.LBB0_26:
|
||||
ret
|
||||
.LBB0_9:
|
||||
movzx ecx, dl
|
||||
movd xmm0, ecx
|
||||
punpcklbw xmm0, xmm0
|
||||
pshuflw xmm0, xmm0, 0
|
||||
pshufd xmm0, xmm0, 0
|
||||
movdqu xmmword ptr [rax + r8 - 16], xmm0
|
||||
movdqu xmmword ptr [rax], xmm0
|
||||
ret
|
||||
.LBB0_18:
|
||||
movdqa xmmword ptr [rdx], xmm0
|
||||
movdqa xmmword ptr [rdx + 16], xmm0
|
||||
add rdx, 32
|
||||
cmp rdx, rcx
|
||||
jae .LBB0_21
|
||||
.LBB0_20:
|
||||
movdqa xmmword ptr [rdx], xmm0
|
||||
movdqa xmmword ptr [rdx + 16], xmm0
|
||||
add rdx, 32
|
||||
cmp rdx, rcx
|
||||
jb .LBB0_22
|
||||
jmp .LBB0_25
|
||||
.LBB0_8:
|
||||
mov dword ptr [rax], ecx
|
||||
mov dword ptr [rax + r8 - 4], ecx
|
||||
ret
|
||||
"#}
|
||||
15
src/win64/strlen.rs
Normal file
15
src/win64/strlen.rs
Normal file
@@ -0,0 +1,15 @@
|
||||
core::arch::global_asm!{r#"
|
||||
.globl strlen
|
||||
strlen:
|
||||
push rdi
|
||||
mov rdx, rcx
|
||||
mov rdi, rcx
|
||||
xor rax, rax
|
||||
mov rcx, -1
|
||||
cld
|
||||
repne scasb
|
||||
mov rax, -2
|
||||
sub rax, rcx
|
||||
pop rdi
|
||||
ret
|
||||
"#}
|
||||
Reference in New Issue
Block a user