diff --git a/crypto/digest/md32_common.h b/crypto/digest/md32_common.h index 947be4c9..9db8c54a 100644 --- a/crypto/digest/md32_common.h +++ b/crypto/digest/md32_common.h @@ -146,38 +146,38 @@ extern "C" { /* The first macro gives a ~30-40% performance improvement in SHA-256 compiled * with gcc on P4. This can only be done on x86, where unaligned data fetches * are possible. */ -#define HOST_c2l(c, l) \ - (void)({ \ - uint32_t r = *((const uint32_t *)(c)); \ - asm("bswapl %0" : "=r"(r) : "0"(r)); \ - (c) += 4; \ - (l) = r; \ +#define HOST_c2l(c, l) \ + (void)({ \ + uint32_t r = *((const uint32_t *)(c)); \ + __asm__("bswapl %0" : "=r"(r) : "0"(r)); \ + (c) += 4; \ + (l) = r; \ }) -#define HOST_l2c(l, c) \ - (void)({ \ - uint32_t r = (l); \ - asm("bswapl %0" : "=r"(r) : "0"(r)); \ - *((uint32_t *)(c)) = r; \ - (c) += 4; \ - r; \ +#define HOST_l2c(l, c) \ + (void)({ \ + uint32_t r = (l); \ + __asm__("bswapl %0" : "=r"(r) : "0"(r)); \ + *((uint32_t *)(c)) = r; \ + (c) += 4; \ + r; \ }) #elif defined(__aarch64__) && defined(__BYTE_ORDER__) #if defined(__ORDER_LITTLE_ENDIAN__) && \ __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -#define HOST_c2l(c, l) \ - (void)({ \ - uint32_t r; \ - asm("rev %w0, %w1" : "=r"(r) : "r"(*((const uint32_t *)(c)))); \ - (c) += 4; \ - (l) = r; \ +#define HOST_c2l(c, l) \ + (void)({ \ + uint32_t r; \ + __asm__("rev %w0, %w1" : "=r"(r) : "r"(*((const uint32_t *)(c)))); \ + (c) += 4; \ + (l) = r; \ }) -#define HOST_l2c(l, c) \ - (void)({ \ - uint32_t r; \ - asm("rev %w0, %w1" : "=r"(r) : "r"((uint32_t)(l))); \ - *((uint32_t *)(c)) = r; \ - (c) += 4; \ - r; \ +#define HOST_l2c(l, c) \ + (void)({ \ + uint32_t r; \ + __asm__("rev %w0, %w1" : "=r"(r) : "r"((uint32_t)(l))); \ + *((uint32_t *)(c)) = r; \ + (c) += 4; \ + r; \ }) #elif defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define HOST_c2l(c, l) (void)((l) = *((const uint32_t *)(c)), (c) += 4)