init
This commit is contained in:
130
src/win64/memset.rs
Normal file
130
src/win64/memset.rs
Normal file
@@ -0,0 +1,130 @@
|
||||
// https://github.com/nadavrot/memset_benchmark/blob/main/src/memset/impl.c
|
||||
|
||||
core::arch::global_asm!{r#"
|
||||
.text
|
||||
.globl memset
|
||||
memset:
|
||||
mov rax, rcx
|
||||
cmp r8, 31
|
||||
ja .LBB0_10
|
||||
cmp r8, 4
|
||||
ja .LBB0_5
|
||||
test r8, r8
|
||||
je .LBB0_26
|
||||
mov byte ptr [rax], dl
|
||||
mov byte ptr [r8 + rax - 1], dl
|
||||
cmp r8, 3
|
||||
jb .LBB0_26
|
||||
mov byte ptr [rax + 1], dl
|
||||
mov byte ptr [rax + 2], dl
|
||||
ret
|
||||
.LBB0_10:
|
||||
movzx ecx, dl
|
||||
movd xmm0, ecx
|
||||
punpcklbw xmm0, xmm0
|
||||
pshuflw xmm0, xmm0, 0
|
||||
pshufd xmm0, xmm0, 0
|
||||
cmp r8, 161
|
||||
jb .LBB0_23
|
||||
movdqu xmmword ptr [rax], xmm0
|
||||
movdqu xmmword ptr [rax + 16], xmm0
|
||||
mov rdx, rax
|
||||
and rdx, -32
|
||||
lea r9, [rax + r8]
|
||||
lea rcx, [rax + r8]
|
||||
add rcx, -32
|
||||
lea r8, [rdx + 192]
|
||||
cmp r8, r9
|
||||
jae .LBB0_12
|
||||
.LBB0_13:
|
||||
movdqa xmmword ptr [r8 - 160], xmm0
|
||||
movdqa xmmword ptr [r8 - 144], xmm0
|
||||
movdqa xmmword ptr [r8 - 128], xmm0
|
||||
movdqa xmmword ptr [r8 - 112], xmm0
|
||||
movdqa xmmword ptr [r8 - 96], xmm0
|
||||
movdqa xmmword ptr [r8 - 80], xmm0
|
||||
movdqa xmmword ptr [r8 - 64], xmm0
|
||||
movdqa xmmword ptr [r8 - 48], xmm0
|
||||
movdqa xmmword ptr [r8 - 32], xmm0
|
||||
movdqa xmmword ptr [r8 - 16], xmm0
|
||||
add r8, 160
|
||||
cmp r8, r9
|
||||
jb .LBB0_13
|
||||
add r8, -160
|
||||
mov rdx, r8
|
||||
cmp rdx, rcx
|
||||
jb .LBB0_16
|
||||
jmp .LBB0_17
|
||||
.LBB0_5:
|
||||
cmp r8, 16
|
||||
ja .LBB0_9
|
||||
movzx edx, dl
|
||||
movabs rcx, 72340172838076673
|
||||
imul rcx, rdx
|
||||
cmp r8, 8
|
||||
jb .LBB0_8
|
||||
mov qword ptr [rax], rcx
|
||||
mov qword ptr [rax + r8 - 8], rcx
|
||||
ret
|
||||
.LBB0_23:
|
||||
lea rcx, [rax + r8]
|
||||
add rcx, -32
|
||||
mov rdx, rax
|
||||
.LBB0_24:
|
||||
movdqu xmmword ptr [rdx], xmm0
|
||||
movdqu xmmword ptr [rdx + 16], xmm0
|
||||
add rdx, 32
|
||||
cmp rdx, rcx
|
||||
jb .LBB0_24
|
||||
jmp .LBB0_25
|
||||
.LBB0_12:
|
||||
add rdx, 32
|
||||
cmp rdx, rcx
|
||||
jae .LBB0_17
|
||||
.LBB0_16:
|
||||
movdqa xmmword ptr [rdx], xmm0
|
||||
movdqa xmmword ptr [rdx + 16], xmm0
|
||||
add rdx, 32
|
||||
.LBB0_17:
|
||||
cmp rdx, rcx
|
||||
jb .LBB0_18
|
||||
cmp rdx, rcx
|
||||
jb .LBB0_20
|
||||
.LBB0_21:
|
||||
cmp rdx, rcx
|
||||
jae .LBB0_25
|
||||
.LBB0_22:
|
||||
movdqa xmmword ptr [rdx], xmm0
|
||||
movdqa xmmword ptr [rdx + 16], xmm0
|
||||
.LBB0_25:
|
||||
movdqu xmmword ptr [rcx], xmm0
|
||||
movdqu xmmword ptr [rcx + 16], xmm0
|
||||
.LBB0_26:
|
||||
ret
|
||||
.LBB0_9:
|
||||
movzx ecx, dl
|
||||
movd xmm0, ecx
|
||||
punpcklbw xmm0, xmm0
|
||||
pshuflw xmm0, xmm0, 0
|
||||
pshufd xmm0, xmm0, 0
|
||||
movdqu xmmword ptr [rax + r8 - 16], xmm0
|
||||
movdqu xmmword ptr [rax], xmm0
|
||||
ret
|
||||
.LBB0_18:
|
||||
movdqa xmmword ptr [rdx], xmm0
|
||||
movdqa xmmword ptr [rdx + 16], xmm0
|
||||
add rdx, 32
|
||||
cmp rdx, rcx
|
||||
jae .LBB0_21
|
||||
.LBB0_20:
|
||||
movdqa xmmword ptr [rdx], xmm0
|
||||
movdqa xmmword ptr [rdx + 16], xmm0
|
||||
add rdx, 32
|
||||
cmp rdx, rcx
|
||||
jb .LBB0_22
|
||||
jmp .LBB0_25
|
||||
.LBB0_8:
|
||||
mov dword ptr [rax], ecx
|
||||
mov dword ptr [rax + r8 - 4], ecx
|
||||
ret
|
||||
"#}
|
||||
Reference in New Issue
Block a user