Faster Keccak code (BMI instructions)

merge-mining
SChernykh 2024-05-30 23:11:14 +02:00
parent 33e1ebd3fe
commit bd6f68790e
9 changed files with 194 additions and 5 deletions

View File

@ -146,6 +146,13 @@ set(SOURCES
src/zmq_reader.cpp
)
if (AMD64)
set(SOURCES ${SOURCES} src/keccak_bmi.cpp)
if (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang)
set_source_files_properties(src/keccak_bmi.cpp PROPERTIES COMPILE_FLAGS "${CMAKE_CXX_FLAGS} -mbmi")
endif()
endif()
if (WITH_RANDOMX)
set(HEADERS ${HEADERS} src/miner.h)
set(SOURCES ${SOURCES} src/miner.cpp)

View File

@ -15,3 +15,7 @@ endif()
if (ARCH_ID MATCHES "^(aarch64|arm64|armv8-a)$")
set(ARMv8 1)
endif()
if (ARCH_ID MATCHES "^(x86_64|x86-64|amd64)$")
set(AMD64 1)
endif()

@ -1 +1 @@
Subproject commit 121f6dda58e6b3d0bc428f0dede1a11728acd8fe
Subproject commit 7ee603afebc063fa9c28a5350daee40be831d468

View File

@ -17,6 +17,9 @@
#include "common.h"
#include "keccak.h"
#ifdef WITH_RANDOMX
#include "RandomX/src/cpu.hpp"
#endif
namespace p2pool {
@ -24,7 +27,7 @@ namespace p2pool {
#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
#endif
static const uint64_t keccakf_rndc[24] =
const uint64_t keccakf_rndc[24] =
{
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
@ -36,7 +39,7 @@ static const uint64_t keccakf_rndc[24] =
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
NOINLINE void keccakf(std::array<uint64_t, 25>& st)
NOINLINE void keccakf_plain(std::array<uint64_t, 25>& st)
{
for (int round = 0; round < KeccakParams::ROUNDS; ++round) {
uint64_t bc[5];
@ -115,6 +118,18 @@ NOINLINE void keccakf(std::array<uint64_t, 25>& st)
}
}
void (*keccakf)(std::array<uint64_t, 25>&) = keccakf_plain;
#if defined(WITH_RANDOMX) && (defined(__x86_64__) || defined(_M_AMD64))
static struct KeccakBMI_Check {
KeccakBMI_Check() {
if (randomx::Cpu().hasBmi()) {
keccakf = keccakf_bmi;
}
}
} keccak_bmi_check;
#endif
NOINLINE void keccak_step(const uint8_t* &in, int &inlen, std::array<uint64_t, 25>& st)
{
constexpr int rsiz = KeccakParams::HASH_DATA_AREA;

View File

@ -24,7 +24,12 @@ enum KeccakParams {
ROUNDS = 24,
};
void keccakf(std::array<uint64_t, 25> &st);
extern const uint64_t keccakf_rndc[24];
extern void (*keccakf)(std::array<uint64_t, 25>& st);
void keccakf_plain(std::array<uint64_t, 25>& st);
void keccakf_bmi(std::array<uint64_t, 25>& st);
void keccak_step(const uint8_t* &in, int &inlen, std::array<uint64_t, 25>& st);
void keccak_finish(const uint8_t* in, int inlen, std::array<uint64_t, 25>& st);

112
src/keccak_bmi.cpp Normal file
View File

@ -0,0 +1,112 @@
/*
* This file is part of the Monero P2Pool <https://github.com/SChernykh/p2pool>
* Copyright (c) 2021-2024 SChernykh <https://github.com/SChernykh>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 3.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "common.h"
#include "keccak.h"
#include <immintrin.h>
namespace p2pool {
#ifndef ROTL64
#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
#endif
#if defined(__GNUC__) && (__GNUC__ < 12) && !defined(_andn_u64)
#define _andn_u64 __andn_u64
#endif
NOINLINE void keccakf_bmi(std::array<uint64_t, 25>& st)
{
for (int round = 0; round < KeccakParams::ROUNDS; ++round) {
uint64_t bc[5];
// Theta
bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20];
bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21];
bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22];
bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23];
bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24];
#define THETA(i) { \
const uint64_t t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1); \
st[i + 0 ] ^= t; \
st[i + 5] ^= t; \
st[i + 10] ^= t; \
st[i + 15] ^= t; \
st[i + 20] ^= t; \
}
THETA(0);
THETA(1);
THETA(2);
THETA(3);
THETA(4);
// Rho Pi
const uint64_t t = st[1];
st[1] = ROTL64(st[6], 44);
st[6] = ROTL64(st[9], 20);
st[9] = ROTL64(st[22], 61);
st[22] = ROTL64(st[14], 39);
st[14] = ROTL64(st[20], 18);
st[20] = ROTL64(st[2], 62);
st[2] = ROTL64(st[12], 43);
st[12] = ROTL64(st[13], 25);
st[13] = ROTL64(st[19], 8);
st[19] = ROTL64(st[23], 56);
st[23] = ROTL64(st[15], 41);
st[15] = ROTL64(st[4], 27);
st[4] = ROTL64(st[24], 14);
st[24] = ROTL64(st[21], 2);
st[21] = ROTL64(st[8], 55);
st[8] = ROTL64(st[16], 45);
st[16] = ROTL64(st[5], 36);
st[5] = ROTL64(st[3], 28);
st[3] = ROTL64(st[18], 21);
st[18] = ROTL64(st[17], 15);
st[17] = ROTL64(st[11], 10);
st[11] = ROTL64(st[7], 6);
st[7] = ROTL64(st[10], 3);
st[10] = ROTL64(t, 1);
// Chi
#define CHI(j) { \
const uint64_t st0 = st[j ]; \
const uint64_t st1 = st[j + 1]; \
const uint64_t st2 = st[j + 2]; \
const uint64_t st3 = st[j + 3]; \
const uint64_t st4 = st[j + 4]; \
st[j ] ^= _andn_u64(st1, st2); \
st[j + 1] ^= _andn_u64(st2, st3); \
st[j + 2] ^= _andn_u64(st3, st4); \
st[j + 3] ^= _andn_u64(st4, st0); \
st[j + 4] ^= _andn_u64(st0, st1); \
}
CHI(0);
CHI(5);
CHI(10);
CHI(15);
CHI(20);
// Iota
st[0] ^= keccakf_rndc[round];
}
}
} // namespace p2pool

View File

@ -74,6 +74,13 @@ set(SOURCES
../src/zmq_reader.cpp
)
if (AMD64)
set(SOURCES ${SOURCES} ../src/keccak_bmi.cpp)
if (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang)
set_source_files_properties(../src/keccak_bmi.cpp PROPERTIES COMPILE_FLAGS "${CMAKE_CXX_FLAGS} -mbmi")
endif()
endif()
if (NOT STATIC_BINARY AND NOT STATIC_LIBS)
include(FindCURL)
endif()

View File

@ -15,3 +15,7 @@ endif()
if (ARCH_ID MATCHES "^(aarch64|arm64|armv8-a)$")
set(ARMv8 1)
endif()
if (ARCH_ID MATCHES "^(x86_64|x86-64|amd64)$")
set(AMD64 1)
endif()

View File

@ -17,11 +17,12 @@
#include "common.h"
#include "keccak.h"
#include "RandomX/src/cpu.hpp"
#include "gtest/gtest.h"
namespace p2pool {
TEST(keccak, hashing)
static void test_keccak()
{
auto check = [](const void* input, size_t size, const char* expected_output) {
hash output;
@ -53,6 +54,40 @@ TEST(keccak, hashing)
std::vector<uint8_t> v(1000000, 'a');
check(v.data(), v.size(), "fadae6b49f129bbb812be8407b7b2894f34aecf6dbd1f9b0f0c7e9853098fc96");
hash test;
for (int i = 0; i < 1000000; ++i) {
keccak(test.h, HASH_SIZE, test.h);
}
char buf[log::Stream::BUF_SIZE + 1];
log::Stream s(buf);
s << test;
ASSERT_EQ(memcmp(buf, "16e199635319b8c568a0405a570382994a90a56d5f116892d8cbcb3b13cda0eb", HASH_SIZE * 2), 0);
}
TEST(keccak, hashing)
{
auto t = keccakf;
keccakf = keccakf_plain;
test_keccak();
keccakf = t;
}
#if defined(__x86_64__) || defined(_M_AMD64)
TEST(keccak, hashing_bmi)
{
if (randomx::Cpu().hasBmi()) {
auto t = keccakf;
keccakf = keccakf_bmi;
test_keccak();
keccakf = t;
}
}
#endif
}