|
- ///--------------------------------------------------------------------------
- /// General definitions.
-
- // Preprocessor hacks.
- #define STRINGY(x) _STRINGY(x, y)
- #define _STRINGY(x) #x
- #define GLUE(x, y) _GLUE(x, y)
- #define _GLUE(x, y) x##y
- #define _EMPTY
-
- // Some useful variables.
- .L$_subsec = 0
-
- // Literal pools done the hard way.
- #define _LIT .text .L$_subsec + 1
- #define _ENDLIT .text .L$_subsec
- #define _LTORG .L$_subsec = .L$_subsec + 2; .text .L$_subsec
-
- // ELF section types.
- #if __ELF__
- # if CPUFAM_ARMEL
- # define _SECTTY(ty) %ty
- # else
- # define _SECTTY(ty) @ty
- # endif
- #endif
-
- // Section selection.
- #define TEXT .text .L$_subsec
- #if ABI_WIN
- # define RODATA .section .rdata, "dr"
- #elif __ELF__
- # define RODATA .section .rodata, "a", _SECTTY(progbits)
- #else
- # define RODATA TEXT
- #endif
- #define DATA .data
-
- // Announcing an internal function.
- #define INTFUNC(name) \
- TYPE_FUNC(name); \
- .macro ENDFUNC; _ENDFUNC(name); .endm; \
- .L$_prologue_p = 0; .L$_frameptr_p = 0; \
- FUNC_PREHOOK(name); \
- name: \
- FUNC_POSTHOOK(name)
-
- // Announcing an external function.
- #define FUNC(name) \
- .globl F(name); \
- INTFUNC(F(name))
-
- // Marking the end of a function.
- #define _ENDFUNC(name) \
- .if ~ .L$_prologue_p; .error "Missing `endprologue'"; .endif; \
- .if .L$_frameptr_p; .purgem dropfp; .endif; \
- .purgem ENDFUNC; \
- SIZE_OBJ(name); \
- ENDFUNC_HOOK(name); \
- _LTORG
-
- // Make a helper function, if necessary.
- #define AUXFN(name) \
- .ifndef .L$_auxfn_def.name; \
- .text 7128; \
- .macro _ENDAUXFN; _ENDAUXFN_TAIL(name); .endm; \
- FUNC_PREHOOK(name); \
- name:
- #define _ENDAUXFN_TAIL(name) \
- .purgem _ENDAUXFN; \
- .text .L$_subsec; \
- .L$_auxfn_def.name = 1
- #define ENDAUXFN _ENDAUXFN; .endif
-
- ///--------------------------------------------------------------------------
- /// ELF-specific hacking.
-
- #if __ELF__
-
- #if __PIC__ || __PIE__
- # define WANT_PIC 1
- #endif
-
- #define TYPE_FUNC(name) .type name, STT_FUNC
-
- #define SIZE_OBJ(name) .size name, . - name
-
- #endif
-
- ///--------------------------------------------------------------------------
- /// Windows-specific hacking.
-
- #if ABI_WIN
-
- #if CPUFAM_X86
- # define F(name) _##name
- #endif
-
- #endif
-
- ///--------------------------------------------------------------------------
- /// x86- and amd64-specific hacking.
- ///
- /// It's (slightly) easier to deal with both of these in one go.
-
- #if CPUFAM_X86 || CPUFAM_AMD64
-
- // Word size.
- #if CPUFAM_X86
- # define WORDSZ 4
- #endif
- #if CPUFAM_AMD64
- # define WORDSZ 8
- #endif
-
- // Set the function hooks.
- #define FUNC_PREHOOK(_) .balign 16
-
- // On Windows, arrange to install stack-unwinding data.
- #if CPUFAM_AMD64 && ABI_WIN
- # define FUNC_POSTHOOK(name) .seh_proc name
- # define ENDFUNC_HOOK(_) .seh_endproc
- // Procedures are expected to invoke `.seh_setframe' if necessary, and
- // `.seh_pushreg' and friends, and `.seh_endprologue'.
- #endif
-
- #if __ELF__
- # define FUNC_POSTHOOK(_) .cfi_startproc
- # define ENDFUNC_HOOK(_) .cfi_endproc
- #endif
-
- // Don't use the wretched AT&T syntax. It's festooned with pointless
- // punctuation, and all of the data movement is backwards. Ugh!
- .intel_syntax noprefix
-
- // Call external subroutine at ADDR, possibly via PLT.
- .macro callext addr
- #if WANT_PIC
- call \addr@PLT
- #else
- call \addr
- #endif
- .endm
-
- // Do I need to arrange a spare GOT register?
- #if WANT_PIC && CPUFAM_X86
- # define NEED_GOT 1
- #endif
- #define GOTREG ebx // Not needed in AMD64 so don't care.
-
- // Maybe load GOT address into GOT.
- .macro ldgot got=GOTREG
- #if WANT_PIC && CPUFAM_X86
- AUXFN(_ldgot.\got)
- mov \got, [esp]
- ret
- ENDAUXFN
- call _ldgot.\got
- add \got, offset _GLOBAL_OFFSET_TABLE_
- #endif
- .endm
-
- // Load address of external symbol ADDR into REG, maybe using GOT.
- .macro leaext reg, addr, got=GOTREG
- #if WANT_PIC
- # if CPUFAM_X86
- mov \reg, [\got + \addr@GOT]
- # endif
- # if CPUFAM_AMD64
- mov \reg, \addr@GOTPCREL[rip]
- # endif
- #else
- # if CPUFAM_X86
- mov \reg, offset \addr
- # endif
- # if CPUFAM_AMD64
- lea \reg, \addr[rip]
- # endif
- #endif
- .endm
-
- // Address expression (possibly using a base register, and a displacement)
- // referring to ADDR, which is within our module, maybe using GOT.
- #define INTADDR(...) INTADDR__0(__VA_ARGS__, GOTREG, dummy)
- #define INTADDR__0(addr, got, ...) INTADDR__1(addr, got)
- #if CPUFAM_AMD64
- # define INTADDR__1(addr, got) addr + rip
- #elif WANT_PIC
- # define INTADDR__1(addr, got) got + addr@GOTOFF
- #else
- # define INTADDR__1(addr, got) addr
- #endif
-
- // Permutations for SIMD instructions. SHUF(A, B, C, D) is an immediate,
- // suitable for use in `pshufd' or `shufpd', which copies element A
- // (0 <= A < 4) of the source to element 0 of the destination, element B to
- // element 1, element C to element 2, and element D to element 3.
- #define SHUF(a, b, c, d) ((a) + 4*(b) + 16*(c) + 64*(d))
-
- // Map register names to their individual pieces.
-
- // Apply decoration decor to (internal) register name reg of type ty.
- //
- // See `R_...' for internal register names. Decorations are as follows.
- //
- // b low byte (e.g., `al', `r8b')
- // h high byte (e.g., `ah')
- // w word (e.g., `ax', `r8w')
- // d doubleword (e.g., `eax', `r8d')
- // q quadword (e.g., `rax', `r8')
- // r whole register (doubleword on x86, quadword on amd64)
- //
- // And types are as follows.
- //
- // abcd the four traditional registers `a', `b', `c', `d'
- // xp the four pointer registers `si', `di', `bp', `sp'
- // ip the instruction pointer `ip'
- // rn the AMD64 numbered registers `r8'--`r15'
- #define _DECOR(ty, decor, reg) _DECOR_##ty##_##decor(reg)
-
- // Internal macros: _DECOR_ty_decor(reg) applies decoration decor to
- // (internal) register name reg of type ty.
-
- #define _DECOR_abcd_b(reg) reg##l
- #define _DECOR_abcd_h(reg) reg##h
- #define _DECOR_abcd_w(reg) reg##x
- #define _DECOR_abcd_d(reg) e##reg##x
- #if CPUFAM_AMD64
- # define _DECOR_abcd_q(reg) r##reg##x
- #endif
-
- #define _DECOR_xp_w(reg) reg
- #define _DECOR_xp_d(reg) e##reg
- #if CPUFAM_AMD64
- # define _DECOR_xp_b(reg) reg##l
- # define _DECOR_xp_q(reg) r##reg
- #endif
-
- #define _DECOR_ip_w(reg) reg
- #define _DECOR_ip_d(reg) e##reg
- #if CPUFAM_AMD64
- # define _DECOR_ip_q(reg) r##reg
- #endif
-
- #if CPUFAM_AMD64
- # define _DECOR_rn_b(reg) reg##b
- # define _DECOR_rn_w(reg) reg##w
- # define _DECOR_rn_d(reg) reg##d
- # define _DECOR_rn_q(reg) reg
- # define _DECOR_rn_r(reg) reg
- #endif
-
- #define _DECOR_mem_b(addr) byte ptr addr
- #define _DECOR_mem_w(addr) word ptr addr
- #define _DECOR_mem_d(addr) dword ptr addr
- #if CPUFAM_AMD64
- # define _DECOR_mem_q(addr) qword ptr addr
- #endif
-
- #define _DECOR_imm_b(imm) byte imm
- #define _DECOR_imm_w(imm) word imm
- #define _DECOR_imm_d(imm) dword imm
- #if CPUFAM_AMD64
- # define _DECOR_imm_q(imm) qword imm
- #endif
-
- #if CPUFAM_X86
- # define _DECOR_abcd_r(reg) e##reg##x
- # define _DECOR_xp_r(reg) e##reg
- # define _DECOR_ip_r(reg) e##reg
- # define _DECOR_mem_r(addr) dword ptr addr
- # define _DECOR_imm_r(imm) dword imm
- #endif
- #if CPUFAM_AMD64
- # define _DECOR_abcd_r(reg) r##reg##x
- # define _DECOR_xp_r(reg) r##reg
- # define _DECOR_ip_r(reg) r##reg
- # define _DECOR_mem_r(addr) qword ptr addr
- # define _DECOR_imm_r(imm) qword imm
- #endif
-
- // R_r(decor) applies decoration decor to register r, which is an internal
- // register name. The internal register names are: `ip', `a', `b', `c', `d',
- // `si', `di', `bp', `sp', `r8'--`r15'.
- #define R_nil(decor) nil
- #define R_ip(decor) _DECOR(ip, decor, ip)
- #define R_a(decor) _DECOR(abcd, decor, a)
- #define R_b(decor) _DECOR(abcd, decor, b)
- #define R_c(decor) _DECOR(abcd, decor, c)
- #define R_d(decor) _DECOR(abcd, decor, d)
- #define R_si(decor) _DECOR(xp, decor, si)
- #define R_di(decor) _DECOR(xp, decor, di)
- #define R_bp(decor) _DECOR(xp, decor, bp)
- #define R_sp(decor) _DECOR(xp, decor, sp)
- #if CPUFAM_AMD64
- # define R_r8(decor) _DECOR(rn, decor, r8)
- # define R_r9(decor) _DECOR(rn, decor, r9)
- # define R_r10(decor) _DECOR(rn, decor, r10)
- # define R_r11(decor) _DECOR(rn, decor, r11)
- # define R_r12(decor) _DECOR(rn, decor, r12)
- # define R_r13(decor) _DECOR(rn, decor, r13)
- # define R_r14(decor) _DECOR(rn, decor, r14)
- # define R_r15(decor) _DECOR(rn, decor, r15)
- #endif
-
- // Refer to an in-memory datum of the type implied by decor residing at
- // address addr (which should supply its own square-brackets).
- #define MEM(decor, addr) _DECOR(mem, decor, addr)
-
- // Refer to an immediate datum of the type implied by decor.
- #define IMM(decor, imm) _DECOR(mem, decor, imm)
-
- // Applies decoration decor to assembler-level register name reg.
- #define _REGFORM(reg, decor) _GLUE(_REGFORM_, reg)(decor)
-
- // Internal macros: _REGFORM_r(decor) applies decoration decor to an
- // assembler-level register name, in place of any decoration that register
- // name has already.
-
- #define _REGFORM_nil(decor) R_nil(decor)
-
- #define _REGFORM_ip(decor) R_ip(decor)
- #define _REGFORM_eip(decor) R_ip(decor)
-
- #define _REGFORM_a(decor) R_a(decor)
- #define _REGFORM_al(decor) R_a(decor)
- #define _REGFORM_ah(decor) R_a(decor)
- #define _REGFORM_ax(decor) R_a(decor)
- #define _REGFORM_eax(decor) R_a(decor)
-
- #define _REGFORM_b(decor) R_b(decor)
- #define _REGFORM_bl(decor) R_b(decor)
- #define _REGFORM_bh(decor) R_b(decor)
- #define _REGFORM_bx(decor) R_b(decor)
- #define _REGFORM_ebx(decor) R_b(decor)
-
- #define _REGFORM_c(decor) R_c(decor)
- #define _REGFORM_cl(decor) R_c(decor)
- #define _REGFORM_ch(decor) R_c(decor)
- #define _REGFORM_cx(decor) R_c(decor)
- #define _REGFORM_ecx(decor) R_c(decor)
-
- #define _REGFORM_d(decor) R_d(decor)
- #define _REGFORM_dl(decor) R_d(decor)
- #define _REGFORM_dh(decor) R_d(decor)
- #define _REGFORM_dx(decor) R_d(decor)
- #define _REGFORM_edx(decor) R_d(decor)
-
- #define _REGFORM_si(decor) R_si(decor)
- #define _REGFORM_sil(decor) R_si(decor)
- #define _REGFORM_esi(decor) R_si(decor)
-
- #define _REGFORM_di(decor) R_di(decor)
- #define _REGFORM_dil(decor) R_di(decor)
- #define _REGFORM_edi(decor) R_di(decor)
-
- #define _REGFORM_bp(decor) R_bp(decor)
- #define _REGFORM_bpl(decor) R_bp(decor)
- #define _REGFORM_ebp(decor) R_bp(decor)
-
- #define _REGFORM_sp(decor) R_sp(decor)
- #define _REGFORM_spl(decor) R_sp(decor)
- #define _REGFORM_esp(decor) R_sp(decor)
-
- #if CPUFAM_AMD64
-
- # define _REGFORM_rip(decor) R_ip(decor)
- # define _REGFORM_rsp(decor) R_sp(decor)
- # define _REGFORM_rbp(decor) R_bp(decor)
- # define _REGFORM_rdi(decor) R_di(decor)
- # define _REGFORM_rsi(decor) R_si(decor)
- # define _REGFORM_rdx(decor) R_d(decor)
- # define _REGFORM_rcx(decor) R_c(decor)
- # define _REGFORM_rbx(decor) R_b(decor)
- # define _REGFORM_rax(decor) R_a(decor)
-
- # define _REGFORM_r8(decor) R_r8(decor)
- # define _REGFORM_r8b(decor) R_r8(decor)
- # define _REGFORM_r8w(decor) R_r8(decor)
- # define _REGFORM_r8d(decor) R_r8(decor)
-
- # define _REGFORM_r9(decor) R_r9(decor)
- # define _REGFORM_r9b(decor) R_r9(decor)
- # define _REGFORM_r9w(decor) R_r9(decor)
- # define _REGFORM_r9d(decor) R_r9(decor)
-
- # define _REGFORM_r10(decor) R_r10(decor)
- # define _REGFORM_r10b(decor) R_r10(decor)
- # define _REGFORM_r10w(decor) R_r10(decor)
- # define _REGFORM_r10d(decor) R_r10(decor)
-
- # define _REGFORM_r11(decor) R_r11(decor)
- # define _REGFORM_r11b(decor) R_r11(decor)
- # define _REGFORM_r11w(decor) R_r11(decor)
- # define _REGFORM_r11d(decor) R_r11(decor)
-
- # define _REGFORM_r12(decor) R_r12(decor)
- # define _REGFORM_r12b(decor) R_r12(decor)
- # define _REGFORM_r12w(decor) R_r12(decor)
- # define _REGFORM_r12d(decor) R_r12(decor)
-
- # define _REGFORM_r13(decor) R_r13(decor)
- # define _REGFORM_r13b(decor) R_r13(decor)
- # define _REGFORM_r13w(decor) R_r13(decor)
- # define _REGFORM_r13d(decor) R_r13(decor)
-
- # define _REGFORM_r14(decor) R_r14(decor)
- # define _REGFORM_r14b(decor) R_r14(decor)
- # define _REGFORM_r14w(decor) R_r14(decor)
- # define _REGFORM_r14d(decor) R_r14(decor)
-
- # define _REGFORM_r15(decor) R_r15(decor)
- # define _REGFORM_r15b(decor) R_r15(decor)
- # define _REGFORM_r15w(decor) R_r15(decor)
- # define _REGFORM_r15d(decor) R_r15(decor)
-
- #endif
-
- // Macros for converting register names.
- #define BYTE(reg) _REGFORM(reg, b)
- #define HIBYTE(reg) _REGFORM(reg, h)
- #define WORD(reg) _REGFORM(reg, w)
- #define DWORD(reg) _REGFORM(reg, d)
- #if CPUFAM_AMD64
- # define QWORD(reg) _REGFORM(reg, q)
- #endif
- #define WHOLE(reg) _REGFORM(reg, r)
-
- // Macros for some common registers.
- #define AX R_a(r)
- #define BX R_b(r)
- #define CX R_c(r)
- #define DX R_d(r)
- #define SI R_si(r)
- #define DI R_di(r)
- #define BP R_bp(r)
- #define SP R_sp(r)
-
- // Stack management and unwinding.
- .macro setfp fp=BP, offset=0
- .if \offset == 0
- mov \fp, SP
- #if __ELF__
- .cfi_def_cfa_register \fp
- #endif
- #if ABI_WIN && CPUFAM_AMD64
- .seh_setframe \fp, 0
- #endif
- .else
- lea \fp, [SP + \offset]
- #if __ELF__
- .cfi_def_cfa_register \fp
- .cfi_adjust_cfa_offset -\offset
- #endif
- #if ABI_WIN && CPUFAM_AMD64
- .seh_setframe \fp, \offset
- #endif
- .endif
- .L$_frameptr_p = -1
- .macro dropfp; _dropfp \fp, \offset; .endm
- .endm
-
- .macro _dropfp fp, offset=0
- .if \offset == 0
- mov SP, \fp
- #if __ELF__
- .cfi_def_cfa_register SP
- #endif
- .else
- lea SP, [\fp - \offset]
- #if __ELF__
- .cfi_def_cfa_register SP
- .cfi_adjust_cfa_offset +\offset
- #endif
- .endif
- .L$_frameptr_p = 0
- .purgem dropfp
- .endm
-
- .macro stalloc n
- sub SP, \n
- #if __ELF__
- .cfi_adjust_cfa_offset +\n
- #endif
- #if ABI_WIN && CPUFAM_AMD64
- .seh_stackalloc \n
- #endif
- .endm
-
- .macro stfree n
- add SP, \n
- #if __ELF__
- .cfi_adjust_cfa_offset -\n
- #endif
- .endm
-
- .macro pushreg r
- push \r
- #if __ELF__
- .cfi_adjust_cfa_offset +WORDSZ
- .cfi_rel_offset \r, 0
- #endif
- #if ABI_WIN && CPUFAM_AMD64
- .seh_pushreg \r
- #endif
- .endm
-
- .macro popreg r
- pop \r
- #if __ELF__
- .cfi_adjust_cfa_offset -WORDSZ
- .cfi_restore \r
- #endif
- .endm
-
- .macro savexmm r, offset
- movdqa [SP + \offset], \r
- #if ABI_WIN && CPUFAM_AMD64
- .seh_savexmm \r, \offset
- #endif
- .endm
-
- .macro rstrxmm r, offset
- movdqa \r, [SP + \offset]
- .endm
-
- .macro endprologue
- #if ABI_WIN && CPUFAM_AMD64
- .seh_endprologue
- #endif
- .L$_prologue_p = -1
- .endm
-
- #endif
-
- ///--------------------------------------------------------------------------
- /// ARM-specific hacking.
-
- #if CPUFAM_ARMEL
-
- // ARM/Thumb mode things. Use ARM by default.
- #define ARM .arm; .L$_pcoff = 8
- #define THUMB .thumb; .L$_pcoff = 4
- ARM
-
- // Set the function hooks.
- #define FUNC_PREHOOK(_) .balign 4; .fnstart
- #define ENDFUNC_HOOK(_) .fnend; .ltorg
-
- // Call external subroutine at ADDR, possibly via PLT.
- .macro callext addr, cond=
- #if WANT_PIC
- bl\cond \addr(PLT)
- #else
- bl\cond \addr
- #endif
- .endm
-
- // Do I need to arrange a spare GOT register?
- #if WANT_PIC
- # define NEED_GOT 1
- #endif
- #define GOTREG r9
-
- // Maybe load GOT address into GOT.
- .macro ldgot cond=, got=GOTREG
- #if WANT_PIC
- ldr\cond \got, .L$_ldgot$\@
- .L$_ldgot_pc$\@:
- add\cond \got, pc, \got
- _LIT
- .balign 4
- .L$_ldgot$\@:
- .word _GLOBAL_OFFSET_TABLE_ - .L$_ldgot_pc$\@ - .L$_pcoff
- _ENDLIT
- #endif
- .endm
-
- // Load address of external symbol ADDR into REG, maybe using GOT.
- .macro leaext reg, addr, cond=, got=GOTREG
- #if WANT_PIC
- ldr\cond \reg, .L$_leaext$\@
- ldr\cond \reg, [\got, \reg]
- _LIT
- .balign 4
- .L$_leaext$\@:
- .word \addr(GOT)
- _ENDLIT
- #else
- ldr\cond \reg, =\addr
- #endif
- .endm
-
- // Load address of external symbol ADDR into REG directly.
- .macro leaextq reg, addr, cond=
- #if WANT_PIC
- ldr\cond \reg, .L$_leaextq$\@
- .L$_leaextq_pc$\@:
- .if .L$_pcoff == 8
- ldr\cond \reg, [pc, \reg]
- .else
- add\cond \reg, pc
- ldr\cond \reg, [\reg]
- .endif
- _LIT
- .balign 4
- .L$_leaextq$\@:
- .word \addr(GOT_PREL) + (. - .L$_leaextq_pc$\@ - .L$_pcoff)
- _ENDLIT
- #else
- ldr\cond \reg, =\addr
- #endif
- .endm
-
- .macro vzero vz=q15
- // Set VZ (default q15) to zero.
- vmov.u32 \vz, #0
- .endm
-
- .macro vshl128 vd, vn, nbit, vz=q15
- // Set VD to VN shifted left by NBIT. Assume VZ (default q15) is
- // all-bits-zero. NBIT must be a multiple of 8.
- .if \nbit&3 != 0
- .error "shift quantity must be whole number of bytes"
- .endif
- vext.8 \vd, \vz, \vn, #16 - (\nbit >> 3)
- .endm
-
- .macro vshr128 vd, vn, nbit, vz=q15
- // Set VD to VN shifted right by NBIT. Assume VZ (default q15) is
- // all-bits-zero. NBIT must be a multiple of 8.
- .if \nbit&3 != 0
- .error "shift quantity must be whole number of bytes"
- .endif
- vext.8 \vd, \vn, \vz, #\nbit >> 3
- .endm
-
- // Apply decoration decor to register name reg.
- #define _REGFORM(reg, decor) _GLUE(_REGFORM_, reg)(decor)
-
- // Internal macros: `_REGFORM_r(decor)' applies decoration decor to register
- // name r.
-
- #define _REGFORM_nil(decor) nil
-
- #define _REGFORM_s0(decor) _DECOR(s, decor, 0)
- #define _REGFORM_s1(decor) _DECOR(s, decor, 1)
- #define _REGFORM_s2(decor) _DECOR(s, decor, 2)
- #define _REGFORM_s3(decor) _DECOR(s, decor, 3)
- #define _REGFORM_s4(decor) _DECOR(s, decor, 4)
- #define _REGFORM_s5(decor) _DECOR(s, decor, 5)
- #define _REGFORM_s6(decor) _DECOR(s, decor, 6)
- #define _REGFORM_s7(decor) _DECOR(s, decor, 7)
- #define _REGFORM_s8(decor) _DECOR(s, decor, 8)
- #define _REGFORM_s9(decor) _DECOR(s, decor, 9)
- #define _REGFORM_s10(decor) _DECOR(s, decor, 10)
- #define _REGFORM_s11(decor) _DECOR(s, decor, 11)
- #define _REGFORM_s12(decor) _DECOR(s, decor, 12)
- #define _REGFORM_s13(decor) _DECOR(s, decor, 13)
- #define _REGFORM_s14(decor) _DECOR(s, decor, 14)
- #define _REGFORM_s15(decor) _DECOR(s, decor, 15)
- #define _REGFORM_s16(decor) _DECOR(s, decor, 16)
- #define _REGFORM_s17(decor) _DECOR(s, decor, 17)
- #define _REGFORM_s18(decor) _DECOR(s, decor, 18)
- #define _REGFORM_s19(decor) _DECOR(s, decor, 19)
- #define _REGFORM_s20(decor) _DECOR(s, decor, 20)
- #define _REGFORM_s21(decor) _DECOR(s, decor, 21)
- #define _REGFORM_s22(decor) _DECOR(s, decor, 22)
- #define _REGFORM_s23(decor) _DECOR(s, decor, 23)
- #define _REGFORM_s24(decor) _DECOR(s, decor, 24)
- #define _REGFORM_s25(decor) _DECOR(s, decor, 25)
- #define _REGFORM_s26(decor) _DECOR(s, decor, 26)
- #define _REGFORM_s27(decor) _DECOR(s, decor, 27)
- #define _REGFORM_s28(decor) _DECOR(s, decor, 28)
- #define _REGFORM_s29(decor) _DECOR(s, decor, 29)
- #define _REGFORM_s30(decor) _DECOR(s, decor, 30)
- #define _REGFORM_s31(decor) _DECOR(s, decor, 31)
-
- #define _REGFORM_d0(decor) _DECOR(d, decor, 0)
- #define _REGFORM_d1(decor) _DECOR(d, decor, 1)
- #define _REGFORM_d2(decor) _DECOR(d, decor, 2)
- #define _REGFORM_d3(decor) _DECOR(d, decor, 3)
- #define _REGFORM_d4(decor) _DECOR(d, decor, 4)
- #define _REGFORM_d5(decor) _DECOR(d, decor, 5)
- #define _REGFORM_d6(decor) _DECOR(d, decor, 6)
- #define _REGFORM_d7(decor) _DECOR(d, decor, 7)
- #define _REGFORM_d8(decor) _DECOR(d, decor, 8)
- #define _REGFORM_d9(decor) _DECOR(d, decor, 9)
- #define _REGFORM_d10(decor) _DECOR(d, decor, 10)
- #define _REGFORM_d11(decor) _DECOR(d, decor, 11)
- #define _REGFORM_d12(decor) _DECOR(d, decor, 12)
- #define _REGFORM_d13(decor) _DECOR(d, decor, 13)
- #define _REGFORM_d14(decor) _DECOR(d, decor, 14)
- #define _REGFORM_d15(decor) _DECOR(d, decor, 15)
- #define _REGFORM_d16(decor) _DECOR(d, decor, 16)
- #define _REGFORM_d17(decor) _DECOR(d, decor, 17)
- #define _REGFORM_d18(decor) _DECOR(d, decor, 18)
- #define _REGFORM_d19(decor) _DECOR(d, decor, 19)
- #define _REGFORM_d20(decor) _DECOR(d, decor, 20)
- #define _REGFORM_d21(decor) _DECOR(d, decor, 21)
- #define _REGFORM_d22(decor) _DECOR(d, decor, 22)
- #define _REGFORM_d23(decor) _DECOR(d, decor, 23)
- #define _REGFORM_d24(decor) _DECOR(d, decor, 24)
- #define _REGFORM_d25(decor) _DECOR(d, decor, 25)
- #define _REGFORM_d26(decor) _DECOR(d, decor, 26)
- #define _REGFORM_d27(decor) _DECOR(d, decor, 27)
- #define _REGFORM_d28(decor) _DECOR(d, decor, 28)
- #define _REGFORM_d29(decor) _DECOR(d, decor, 29)
- #define _REGFORM_d30(decor) _DECOR(d, decor, 30)
- #define _REGFORM_d31(decor) _DECOR(d, decor, 31)
-
- #define _REGFORM_q0(decor) _DECOR(q, decor, 0)
- #define _REGFORM_q1(decor) _DECOR(q, decor, 1)
- #define _REGFORM_q2(decor) _DECOR(q, decor, 2)
- #define _REGFORM_q3(decor) _DECOR(q, decor, 3)
- #define _REGFORM_q4(decor) _DECOR(q, decor, 4)
- #define _REGFORM_q5(decor) _DECOR(q, decor, 5)
- #define _REGFORM_q6(decor) _DECOR(q, decor, 6)
- #define _REGFORM_q7(decor) _DECOR(q, decor, 7)
- #define _REGFORM_q8(decor) _DECOR(q, decor, 8)
- #define _REGFORM_q9(decor) _DECOR(q, decor, 9)
- #define _REGFORM_q10(decor) _DECOR(q, decor, 10)
- #define _REGFORM_q11(decor) _DECOR(q, decor, 11)
- #define _REGFORM_q12(decor) _DECOR(q, decor, 12)
- #define _REGFORM_q13(decor) _DECOR(q, decor, 13)
- #define _REGFORM_q14(decor) _DECOR(q, decor, 14)
- #define _REGFORM_q15(decor) _DECOR(q, decor, 15)
-
- // `_LOPART(n)' and `_HIPART(n)' return the numbers of the register halves of
- // register n, i.e., 2*n and 2*n + 1 respectively.
- #define _LOPART(n) _GLUE(_LOPART_, n)
- #define _HIPART(n) _GLUE(_HIPART_, n)
-
- // Internal macros: `_LOPART_n' and `_HIPART_n' return the numbers of the
- // register halves of register n, i.e., 2*n and 2*n + 1 respectively.
-
- #define _LOPART_0 0
- #define _HIPART_0 1
- #define _LOPART_1 2
- #define _HIPART_1 3
- #define _LOPART_2 4
- #define _HIPART_2 5
- #define _LOPART_3 6
- #define _HIPART_3 7
- #define _LOPART_4 8
- #define _HIPART_4 9
- #define _LOPART_5 10
- #define _HIPART_5 11
- #define _LOPART_6 12
- #define _HIPART_6 13
- #define _LOPART_7 14
- #define _HIPART_7 15
- #define _LOPART_8 16
- #define _HIPART_8 17
- #define _LOPART_9 18
- #define _HIPART_9 19
- #define _LOPART_10 20
- #define _HIPART_10 21
- #define _LOPART_11 22
- #define _HIPART_11 23
- #define _LOPART_12 24
- #define _HIPART_12 25
- #define _LOPART_13 26
- #define _HIPART_13 27
- #define _LOPART_14 28
- #define _HIPART_14 29
- #define _LOPART_15 30
- #define _HIPART_15 31
-
- // Return the register number of the pair containing register n, i.e.,
- // floor(n/2).
- #define _PAIR(n) _GLUE(_PAIR_, n)
-
- // Internal macros: `_PAIR_n' returns the register number of the pair
- // containing register n, i.e., floor(n/2).
- #define _PAIR_0 0
- #define _PAIR_1 0
- #define _PAIR_2 1
- #define _PAIR_3 1
- #define _PAIR_4 2
- #define _PAIR_5 2
- #define _PAIR_6 3
- #define _PAIR_7 3
- #define _PAIR_8 4
- #define _PAIR_9 4
- #define _PAIR_10 5
- #define _PAIR_11 5
- #define _PAIR_12 6
- #define _PAIR_13 6
- #define _PAIR_14 7
- #define _PAIR_15 7
- #define _PAIR_16 8
- #define _PAIR_17 8
- #define _PAIR_18 9
- #define _PAIR_19 9
- #define _PAIR_20 10
- #define _PAIR_21 10
- #define _PAIR_22 11
- #define _PAIR_23 11
- #define _PAIR_24 12
- #define _PAIR_25 12
- #define _PAIR_26 13
- #define _PAIR_27 13
- #define _PAIR_28 14
- #define _PAIR_29 14
- #define _PAIR_30 15
- #define _PAIR_31 15
-
- // Apply decoration decor to register number n of type ty. Decorations are
- // as follows.
- //
- // decor types meaning
- // Q s, d the NEON qN register containing this one
- // D s the NEON dN register containing this one
- // D0 q the low 64-bit half of this one
- // D1 q the high 64-bit half of this one
- // S0 d, q the first 32-bit piece of this one
- // S1 d, q the second 32-bit piece of this one
- // S2 q the third 32-bit piece of this one
- // S3 q the fourth 32-bit piece of this one
- // Bn q the nth byte of this register, as a scalar
- // Hn q the nth halfword of this register, as a scalar
- // Wn q the nth word of this register, as a scalar
- #define _DECOR(ty, decor, n) _DECOR_##ty##_##decor(n)
-
- // Internal macros: `_DECOR_ty_decor(n)' applies decoration decor to register
- // number n of type ty.
-
- #define _DECOR_s_Q(n) GLUE(q, _PAIR(_PAIR(n)))
- #define _DECOR_s_D(n) GLUE(d, _PAIR(n))
-
- #define _DECOR_d_Q(n) GLUE(q, _PAIR(n))
- #define _DECOR_d_S0(n) GLUE(s, _LOPART(n))
- #define _DECOR_d_S1(n) GLUE(s, _LOPART(n))
-
- #define _DECOR_q_D0(n) GLUE(d, _LOPART(n))
- #define _DECOR_q_D1(n) GLUE(d, _HIPART(n))
- #define _DECOR_q_S0(n) GLUE(s, _LOPART(_LOPART(n)))
- #define _DECOR_q_S1(n) GLUE(s, _HIPART(_LOPART(n)))
- #define _DECOR_q_S2(n) GLUE(s, _LOPART(_HIPART(n)))
- #define _DECOR_q_S3(n) GLUE(s, _HIPART(_HIPART(n)))
- #define _DECOR_q_W0(n) GLUE(d, _LOPART(n))[0]
- #define _DECOR_q_W1(n) GLUE(d, _LOPART(n))[1]
- #define _DECOR_q_W2(n) GLUE(d, _HIPART(n))[0]
- #define _DECOR_q_W3(n) GLUE(d, _HIPART(n))[1]
- #define _DECOR_q_H0(n) GLUE(d, _LOPART(n))[0]
- #define _DECOR_q_H1(n) GLUE(d, _LOPART(n))[1]
- #define _DECOR_q_H2(n) GLUE(d, _LOPART(n))[2]
- #define _DECOR_q_H3(n) GLUE(d, _LOPART(n))[3]
- #define _DECOR_q_H4(n) GLUE(d, _HIPART(n))[0]
- #define _DECOR_q_H5(n) GLUE(d, _HIPART(n))[1]
- #define _DECOR_q_H6(n) GLUE(d, _HIPART(n))[2]
- #define _DECOR_q_H7(n) GLUE(d, _HIPART(n))[3]
- #define _DECOR_q_B0(n) GLUE(d, _LOPART(n))[0]
- #define _DECOR_q_B1(n) GLUE(d, _LOPART(n))[1]
- #define _DECOR_q_B2(n) GLUE(d, _LOPART(n))[2]
- #define _DECOR_q_B3(n) GLUE(d, _LOPART(n))[3]
- #define _DECOR_q_B4(n) GLUE(d, _LOPART(n))[4]
- #define _DECOR_q_B5(n) GLUE(d, _LOPART(n))[5]
- #define _DECOR_q_B6(n) GLUE(d, _LOPART(n))[6]
- #define _DECOR_q_B7(n) GLUE(d, _LOPART(n))[7]
- #define _DECOR_q_B8(n) GLUE(d, _HIPART(n))[0]
- #define _DECOR_q_B9(n) GLUE(d, _HIPART(n))[1]
- #define _DECOR_q_B10(n) GLUE(d, _HIPART(n))[2]
- #define _DECOR_q_B11(n) GLUE(d, _HIPART(n))[3]
- #define _DECOR_q_B12(n) GLUE(d, _HIPART(n))[4]
- #define _DECOR_q_B13(n) GLUE(d, _HIPART(n))[5]
- #define _DECOR_q_B14(n) GLUE(d, _HIPART(n))[6]
- #define _DECOR_q_B15(n) GLUE(d, _HIPART(n))[7]
-
- // Macros for navigating the NEON register hierarchy.
- #define S0(reg) _REGFORM(reg, S0)
- #define S1(reg) _REGFORM(reg, S1)
- #define S2(reg) _REGFORM(reg, S2)
- #define S3(reg) _REGFORM(reg, S3)
- #define D(reg) _REGFORM(reg, D)
- #define D0(reg) _REGFORM(reg, D0)
- #define D1(reg) _REGFORM(reg, D1)
- #define Q(reg) _REGFORM(reg, Q)
-
- // Macros for indexing quadword registers.
- #define QB(reg, i) _REGFORM(reg, B##i)
- #define QH(reg, i) _REGFORM(reg, H##i)
- #define QW(reg, i) _REGFORM(reg, W##i)
-
- // Macros for converting vldm/vstm ranges.
- #define QQ(qlo, qhi) D0(qlo)-D1(qhi)
-
- // Stack management and unwinding.
- .macro setfp fp=r11, offset=0
- .if \offset == 0
- mov \fp, sp
- .setfp \fp, sp
- .else
- add \fp, sp, #\offset
- .setfp \fp, sp, #\offset
- .endif
- .macro dropfp; _dropfp \fp, \offset; .endm
- .L$_frameptr_p = -1
- .endm
-
- .macro _dropfp fp, offset=0
- .if \offset == 0
- mov sp, \fp
- .else
- sub sp, \fp, #\offset
- .endif
- .purgem dropfp
- .L$_frameptr_p = 0
- .endm
-
- .macro stalloc n
- sub sp, sp, #\n
- .pad #\n
- .endm
-
- .macro stfree n
- add sp, sp, #\n
- .pad #-\n
- .endm
-
- .macro pushreg rr:vararg
- push {\rr}
- .save {\rr}
- .endm
-
- .macro popreg rr:vararg
- pop {\rr}
- .endm
-
- .macro pushvfp rr:vararg
- vstmdb sp!, {\rr}
- .vsave {\rr}
- .endm
-
- .macro popvfp rr:vararg
- vldmia sp!, {\rr}
- .endm
-
- .macro endprologue
- .endm
-
- // No need for prologue markers on ARM.
- #define FUNC_POSTHOOK(_) .L$_prologue_p = -1
-
- #endif
-
- ///--------------------------------------------------------------------------
- /// AArch64-specific hacking.
-
- #if CPUFAM_ARM64
-
- // Set the function hooks.
- #define FUNC_PREHOOK(_) .balign 4
- #define FUNC_POSTHOOK(_) .cfi_startproc; .L$_prologue_p = -1
- #define ENDFUNC_HOOK(_) .cfi_endproc
-
- // Call external subroutine at ADDR, possibly via PLT.
- .macro callext addr
- bl \addr
- .endm
-
- // Load address of external symbol ADDR into REG.
- .macro leaext reg, addr
- #if WANT_PIC
- adrp \reg, :got:\addr
- ldr \reg, [\reg, #:got_lo12:\addr]
- #else
- adrp \reg, \addr
- add \reg, \reg, #:lo12:\addr
- #endif
- .endm
-
- .macro vzero vz=v31
- // Set VZ (default v31) to zero.
- dup \vz\().4s, wzr
- .endm
-
- .macro vshl128 vd, vn, nbit, vz=v31
- // Set VD to VN shifted left by NBIT. Assume VZ (default v31) is
- // all-bits-zero. NBIT must be a multiple of 8.
- .if \nbit&3 != 0
- .error "shift quantity must be whole number of bytes"
- .endif
- ext \vd\().16b, \vz\().16b, \vn\().16b, #16 - (\nbit >> 3)
- .endm
-
- .macro vshr128 vd, vn, nbit, vz=v31
- // Set VD to VN shifted right by NBIT. Assume VZ (default v31) is
- // all-bits-zero. NBIT must be a multiple of 8.
- .if \nbit&3 != 0
- .error "shift quantity must be whole number of bytes"
- .endif
- ext \vd\().16b, \vn\().16b, \vz\().16b, #\nbit >> 3
- .endm
-
- // Stack management and unwinding.
- .macro setfp fp=x29, offset=0
- // If you're just going through the motions with a fixed-size stack frame,
- // then you want to say `add x29, sp, #OFFSET' directly, which will avoid
- // pointlessly restoring sp later.
- .if \offset == 0
- mov \fp, sp
- .cfi_def_cfa_register \fp
- .else
- add \fp, sp, #\offset
- .cfi_def_cfa_register \fp
- .cfi_adjust_cfa_offset -\offset
- .endif
- .macro dropfp; _dropfp \fp, \offset; .endm
- .L$_frameptr_p = -1
- .endm
-
- .macro _dropfp fp, offset=0
- .if \offset == 0
- mov sp, \fp
- .cfi_def_cfa_register sp
- .else
- sub sp, \fp, #\offset
- .cfi_def_cfa_register sp
- .cfi_adjust_cfa_offset +\offset
- .endif
- .purgem dropfp
- .L$_frameptr_p = 0
- .endm
-
- .macro stalloc n
- sub sp, sp, #\n
- .cfi_adjust_cfa_offset +\n
- .endm
-
- .macro stfree n
- add sp, sp, #\n
- .cfi_adjust_cfa_offset -\n
- .endm
-
- .macro pushreg x, y=nil
- .ifeqs "\y", "nil"
- str \x, [sp, #-16]!
- .cfi_adjust_cfa_offset +16
- .cfi_rel_offset \x, 0
- .else
- stp \x, \y, [sp, #-16]!
- .cfi_adjust_cfa_offset +16
- .cfi_rel_offset \x, 0
- .cfi_rel_offset \y, 8
- .endif
- .endm
-
- .macro popreg x, y=nil
- .ifeqs "\y", "nil"
- ldr \x, [sp], #16
- .cfi_restore \x
- .cfi_adjust_cfa_offset -16
- .else
- ldp \x, \y, [sp], #16
- .cfi_restore \x
- .cfi_restore \y
- .cfi_adjust_cfa_offset -16
- .endif
- .endm
-
- .macro savereg x, y, z=nil
- .ifeqs "\z", "nil"
- str \x, [sp, \y]
- .cfi_rel_offset \x, \y
- .else
- stp \x, \y, [sp, #\z]
- .cfi_rel_offset \x, \z
- .cfi_rel_offset \y, \z + 8
- .endif
- .endm
-
- .macro rstrreg x, y, z=nil
- .ifeqs "\z", "nil"
- ldr \x, [sp, \y]
- .cfi_restore \x
- .else
- ldp \x, \y, [sp, #\z]
- .cfi_restore \x
- .cfi_restore \y
- .endif
- .endm
-
- .macro endprologue
- .endm
-
- // cmov RD, RN, CC: set RD to RN if CC is satisfied, otherwise do nothing
- .macro cmov rd, rn, cc
- csel \rd, \rn, \rd, \cc
- .endm
-
- // Notational improvement: write `csel.CC' etc., rather than `csel ..., CC'.
- #define _COND(_) \
- _(eq) _(ne) _(cs) _(cc) _(vs) _(vc) _(mi) _(pl) \
- _(ge) _(lt) _(gt) _(le) _(hi) _(ls) _(al) _(nv) \
- _(hs) _(lo)
- #define _INST(_) \
- _(ccmp) _(ccmn) \
- _(csel) _(cmov) \
- _(csinc) _(cinc) _(cset) \
- _(csneg) _(cneg) \
- _(csinv) _(cinv) _(csetm)
- #define _CONDVAR(cc) _definstvar cc;
- #define _INSTVARS(inst) \
- .macro _definstvar cc; \
- .macro inst.\cc args:vararg; inst \args, \cc; .endm; \
- .endm; \
- _COND(_CONDVAR); \
- .purgem _definstvar;
- _INST(_INSTVARS)
- #undef _COND
- #undef _INST
- #undef _CONDVAR
- #undef _INSTVARS
-
- // Flag bits for `ccmp' and friends.
- #define CCMP_N 8
- #define CCMP_Z 4
- #define CCMP_C 2
- #define CCMP_V 1
-
- // Flag settings for satisfying conditions.
- #define CCMP_MI CCMP_N
- #define CCMP_PL 0
- #define CCMP_EQ CCMP_Z
- #define CCMP_NE 0
- #define CCMP_CS CCMP_C
- #define CCMP_HS CCMP_C
- #define CCMP_CC 0
- #define CCMP_LO 0
- #define CCMP_VS CCMP_V
- #define CCMP_VC 0
- #define CCMP_HI CCMP_C
- #define CCMP_LS 0
- #define CCMP_LT CCMP_N
- #define CCMP_GE 0
- #define CCMP_LE CCMP_N
- #define CCMP_GT 0
-
- #endif
-
- ///--------------------------------------------------------------------------
- /// Final stuff.
-
- // Default values for the various hooks.
- #ifndef FUNC_PREHOOK
- # define FUNC_PREHOOK(_)
- #endif
- #ifndef FUNC_POSTHOOK
- # define FUNC_POSTHOOK(_)
- #endif
- #ifndef ENDFUNC_HOOK
- # define ENDFUNC_HOOK(_)
- #endif
-
- #ifndef F
- # ifdef SYM_USCORE
- # define F(name) _##name
- # else
- # define F(name) name
- # endif
- #endif
-
- #ifndef TYPE_FUNC
- # define TYPE_FUNC(name)
- #endif
-
- #ifndef SIZE_OBJ
- # define SIZE_OBJ(name)
- #endif
-
- #if __ELF__ && !defined(WANT_EXECUTABLE_STACK)
- .pushsection .note.GNU-stack, "", _SECTTY(progbits)
- .popsection
- #endif
-
- ///----- That's all, folks --------------------------------------------------
-
- #endif
|