.intel_syntax noprefix .section .rodata .global u512_1 u512_1: .quad 1, 0, 0, 0, 0, 0, 0, 0 .section .text .global u512_set u512_set: cld mov rax, rsi stosq xor rax, rax mov rcx, 7 rep stosq ret .global u512_bit u512_bit: mov rcx, rsi and rcx, 0x3f shr rsi, 6 mov rax, [rdi + 8*rsi] shr rax, cl and rax, 1 ret .global u512_add3 u512_add3: mov rax, [rsi + 0] add rax, [rdx + 0] mov [rdi + 0], rax .set k, 1 .rept 7 mov rax, [rsi + 8*k] adc rax, [rdx + 8*k] mov [rdi + 8*k], rax .set k, k+1 .endr setc al movzx rax, al ret .global u512_sub3 u512_sub3: mov rax, [rsi + 0] sub rax, [rdx + 0] mov [rdi + 0], rax .set k, 1 .rept 7 mov rax, [rsi + 8*k] sbb rax, [rdx + 8*k] mov [rdi + 8*k], rax .set k, k+1 .endr setc al movzx rax, al ret .global u512_mul3_64 u512_mul3_64: mulx r10, rax, [rsi + 0] mov [rdi + 0], rax mulx r11, rax, [rsi + 8] add rax, r10 mov [rdi + 8], rax mulx r10, rax, [rsi + 16] adcx rax, r11 mov [rdi + 16], rax mulx r11, rax, [rsi + 24] adcx rax, r10 mov [rdi + 24], rax mulx r10, rax, [rsi + 32] adcx rax, r11 mov [rdi + 32],rax mulx r11, rax, [rsi + 40] adcx rax, r10 mov [rdi + 40],rax mulx r10, rax, [rsi + 48] adcx rax, r11 mov [rdi + 48],rax mulx r11, rax, [rsi + 56] adcx rax, r10 mov [rdi + 56],rax ret