diff --git a/external/src/randomx/CMakeLists.txt b/external/src/randomx/CMakeLists.txt deleted file mode 100644 index 9eef929..0000000 --- a/external/src/randomx/CMakeLists.txt +++ /dev/null @@ -1,233 +0,0 @@ -# Copyright (c) 2019, The Monero Project -# -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without modification, are -# permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this list of -# conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, this list -# of conditions and the following disclaimer in the documentation and/or other -# materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its contributors may be -# used to endorse or promote products derived from this software without specific -# prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -# THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF -# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -cmake_minimum_required(VERSION 2.8.12) - -project(RandomX) - -set(randomx_sources -src/aes_hash.cpp -src/aes_hash.hpp -src/allocator.cpp -src/allocator.hpp -src/argon2.h -src/argon2_avx2.c -src/argon2_core.c -src/argon2_core.h -src/argon2_ref.c -src/argon2_ssse3.c -src/assembly_generator_x86.cpp -src/assembly_generator_x86.hpp -src/blake2_generator.cpp -src/blake2_generator.hpp -src/bytecode_machine.cpp -src/bytecode_machine.hpp -src/common.hpp -src/configuration.h -src/cpu.cpp -src/cpu.hpp -src/dataset.cpp -src/dataset.hpp -src/instruction.cpp -src/instruction.hpp -src/instructions_portable.cpp -src/instruction_weights.hpp -src/intrin_portable.h -src/jit_compiler.hpp -src/jit_compiler_fallback.hpp -src/program.hpp -src/randomx.cpp -src/randomx.h -src/reciprocal.c -src/reciprocal.h -src/soft_aes.cpp -src/soft_aes.h -src/superscalar.cpp -src/superscalar.hpp -src/superscalar_program.hpp -src/virtual_machine.cpp -src/virtual_machine.hpp -src/virtual_memory.cpp -src/virtual_memory.hpp -src/vm_compiled.cpp -src/vm_compiled.hpp -src/vm_compiled_light.cpp -src/vm_compiled_light.hpp -src/vm_interpreted.cpp -src/vm_interpreted.hpp -src/vm_interpreted_light.cpp -src/vm_interpreted_light.hpp -src/blake2/blake2-impl.h -src/blake2/blake2.h -src/blake2/blake2b.c -src/blake2/blamka-round-avx2.h -src/blake2/blamka-round-ref.h -src/blake2/blamka-round-ssse3.h -src/blake2/endian.h -) -if(NOT ARCH_ID) - # allow cross compiling - if(CMAKE_SYSTEM_PROCESSOR STREQUAL "") - set(CMAKE_SYSTEM_PROCESSOR ${CMAKE_HOST_SYSTEM_PROCESSOR}) - endif() - string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" ARCH_ID) -endif() - -if(NOT ARM_ID) - set(ARM_ID "${ARCH_ID}") -endif() - -if(NOT ARCH) - set(ARCH "default") -endif() - -if(NOT CMAKE_BUILD_TYPE) - set(CMAKE_BUILD_TYPE Release) - message(STATUS "Setting default build type: ${CMAKE_BUILD_TYPE}") -endif() - -include(CheckCXXCompilerFlag) -include(CheckCCompilerFlag) - -function(add_flag flag) - string(REPLACE "-" "_" supported_cxx ${flag}_cxx) - check_cxx_compiler_flag(${flag} ${supported_cxx}) - if(${${supported_cxx}}) - message(STATUS "Setting CXX flag ${flag}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${flag}" PARENT_SCOPE) - endif() - string(REPLACE "-" "_" supported_c ${flag}_c) - check_c_compiler_flag(${flag} ${supported_c}) - if(${${supported_c}}) - message(STATUS "Setting C flag ${flag}") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${flag}" PARENT_SCOPE) - endif() -endfunction() - -# x86-64 -if(ARCH_ID STREQUAL "x86_64" OR ARCH_ID STREQUAL "x86-64" OR ARCH_ID STREQUAL "amd64") - list(APPEND randomx_sources - src/jit_compiler_x86.cpp - src/jit_compiler_x86.hpp - src/jit_compiler_x86_static.hpp -) - - if(MSVC) - set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /W0 /MTd") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /W0 /MTd") - - set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /W0 /MT") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /W0 /MT") - - set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} /W0 /MT /DRELWITHDEBINFO") - set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /W0 /MT /DRELWITHDEBINFO") - - enable_language(ASM_MASM) - list(APPEND randomx_sources src/jit_compiler_x86_static.asm) - - set_property(SOURCE src/jit_compiler_x86_static.asm PROPERTY LANGUAGE ASM_MASM) - - set_source_files_properties(src/argon2_avx2.c COMPILE_FLAGS /arch:AVX2) - - add_custom_command(OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/src/asm/configuration.asm - COMMAND powershell -ExecutionPolicy Bypass -File h2inc.ps1 ..\\src\\configuration.h > ..\\src\\asm\\configuration.asm SET ERRORLEVEL = 0 - COMMENT "Generating configuration.asm at ${CMAKE_CURRENT_SOURCE_DIR}" - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/vcxproj) - else() - list(APPEND randomx_sources src/jit_compiler_x86_static.S) - - # cheat because cmake and ccache hate each other - set_property(SOURCE src/jit_compiler_x86_static.S PROPERTY LANGUAGE C) - set_property(SOURCE src/jit_compiler_x86_static.S PROPERTY XCODE_EXPLICIT_FILE_TYPE sourcecode.asm) - - if(ARCH STREQUAL "native") - add_flag("-march=native") - else() - # default build has hardware AES enabled (software AES can be selected at runtime) - add_flag("-maes") - check_c_compiler_flag(-mssse3 HAVE_SSSE3) - if(HAVE_SSSE3) - set_source_files_properties(src/argon2_ssse3.c COMPILE_FLAGS -mssse3) - endif() - check_c_compiler_flag(-mavx2 HAVE_AVX2) - if(HAVE_AVX2) - set_source_files_properties(src/argon2_avx2.c COMPILE_FLAGS -mavx2) - endif() - endif() - endif() -endif() - -# PowerPC -if(ARCH_ID STREQUAL "ppc64" OR ARCH_ID STREQUAL "ppc64le") - if(ARCH STREQUAL "native") - add_flag("-mcpu=native") - endif() - # PowerPC AES requires ALTIVEC (POWER7+), so it cannot be enabled in the default build -endif() - -# ARMv8 -if(ARM_ID STREQUAL "aarch64" OR ARM_ID STREQUAL "arm64" OR ARM_ID STREQUAL "armv8-a") - list(APPEND randomx_sources - src/jit_compiler_a64_static.S - src/jit_compiler_a64.cpp - src/jit_compiler_a64.hpp - src/jit_compiler_a64_static.hpp - ) - # cheat because cmake and ccache hate each other - set_property(SOURCE src/jit_compiler_a64_static.S PROPERTY LANGUAGE C) - set_property(SOURCE src/jit_compiler_a64_static.S PROPERTY XCODE_EXPLICIT_FILE_TYPE sourcecode.asm) - - # not sure if this check is needed - include(CheckIncludeFile) - check_include_file(asm/hwcap.h HAVE_HWCAP) - if(HAVE_HWCAP) - add_definitions(-DHAVE_HWCAP) - endif() - - if(ARCH STREQUAL "native") - add_flag("-march=native") - else() - # default build has hardware AES enabled (software AES can be selected at runtime) - add_flag("-march=armv8-a+crypto") - endif() -endif() - -set(RANDOMX_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/src" CACHE STRING "RandomX Include path") - -add_library(randomx ${randomx_sources}) - -set_property(TARGET randomx PROPERTY POSITION_INDEPENDENT_CODE ON) -set_property(TARGET randomx PROPERTY CXX_STANDARD 14) -set_property(TARGET randomx PROPERTY CXX_STANDARD_REQUIRED ON) -set_property(TARGET randomx PROPERTY PUBLIC_HEADER src/randomx.h) - -include(GNUInstallDirs) -install(TARGETS randomx - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} - PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) diff --git a/external/src/randomx/LICENSE b/external/src/randomx/LICENSE deleted file mode 100644 index b1572ae..0000000 --- a/external/src/randomx/LICENSE +++ /dev/null @@ -1,27 +0,0 @@ -Copyright (c) 2018-2019, tevador - -Copyright (c) 2014-2019, The Monero Project - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/external/src/randomx/README.md b/external/src/randomx/README.md deleted file mode 100644 index 4c1dabb..0000000 --- a/external/src/randomx/README.md +++ /dev/null @@ -1,158 +0,0 @@ -# RandomX -RandomX is a proof-of-work (PoW) algorithm that is optimized for general-purpose CPUs. RandomX uses random code execution (hence the name) together with several memory-hard techniques to minimize the efficiency advantage of specialized hardware. - -## Overview - -RandomX utilizes a virtual machine that executes programs in a special instruction set that consists of integer math, floating point math and branches. These programs can be translated into the CPU's native machine code on the fly (example: [program.asm](doc/program.asm)). At the end, the outputs of the executed programs are consolidated into a 256-bit result using a cryptographic hashing function ([Blake2b](https://blake2.net/)). - -RandomX can operate in two main modes with different memory requirements: - -* **Fast mode** - requires 2080 MiB of shared memory. -* **Light mode** - requires only 256 MiB of shared memory, but runs significantly slower - -Both modes are interchangeable as they give the same results. The fast mode is suitable for "mining", while the light mode is expected to be used only for proof verification. - -## Documentation - -Full specification is available in [specs.md](doc/specs.md). - -Design description and analysis is available in [design.md](doc/design.md). - -## Audits - -Between May and August 2019, RandomX was audited by 4 independent security research teams: - -* [Trail of Bits](https://www.trailofbits.com/) (28 000 USD) -* [X41 D-SEC](https://www.x41-dsec.de/) (42 000 EUR) -* [Kudelski Security](https://www.kudelskisecurity.com/) (18 250 CHF) -* [QuarksLab](https://quarkslab.com/en/) (52 800 USD) - -The first audit was generously funded by [Arweave](https://www.arweave.org/), one of the early adopters of RandomX. The remaining three audits were funded by donations from the [Monero community](https://ccs.getmonero.org/proposals/RandomX-audit.html). All four audits were coordinated by [OSTIF](https://ostif.org/). - -Final reports from all four audits are available in the [audits](audits/) directory. None of the audits found any critical vulnerabilities, but several changes in the algorithm and the code were made as a direct result of the audits. More details can be found in the [final report by OSTIF](https://ostif.org/four-audits-of-randomx-for-monero-and-arweave-have-been-completed-results/). - -## Build - -RandomX is written in C++11 and builds a static library with a C API provided by header file [randomx.h](src/randomx.h). Minimal API usage example is provided in [api-example1.c](src/tests/api-example1.c). The reference code includes a `randomx-benchmark` and `randomx-tests` executables for testing. - -### Linux - -Build dependencies: `cmake` (minimum 2.8.7) and `gcc` (minimum version 4.8, but version 7+ is recommended). - -To build optimized binaries for your machine, run: -``` -git clone https://github.com/tevador/RandomX.git -cd RandomX -mkdir build && cd build -cmake -DARCH=native .. -make -``` - -To build portable binaries, omit the `ARCH` option when executing cmake. - -### Windows - -On Windows, it is possible to build using MinGW (same procedure as on Linux) or using Visual Studio (solution file is provided). - -### Precompiled binaries - -Precompiled `randomx-benchmark` binaries are available on the [Releases page](https://github.com/tevador/RandomX/releases). - -## Proof of work - -RandomX was primarily designed as a PoW algorithm for [Monero](https://www.getmonero.org/). The recommended usage is following: - -* The key `K` is selected to be the hash of a block in the blockchain - this block is called the 'key block'. For optimal mining and verification performance, the key should change every 2048 blocks (~2.8 days) and there should be a delay of 64 blocks (~2 hours) between the key block and the change of the key `K`. This can be achieved by changing the key when `blockHeight % 2048 == 64` and selecting key block such that `keyBlockHeight % 2048 == 0`. -* The input `H` is the standard hashing blob with a selected nonce value. - -RandomX was successfully activated on the Monero network on the 30th November 2019. - -If you wish to use RandomX as a PoW algorithm for your cryptocurrency, please follow the [configuration guidelines](doc/configuration.md). - -**Note**: To achieve ASIC resistance, the key `K` must change and must not be miner-selectable. We recommend to use blockchain data as the key in a similar way to the Monero example above. If blockchain data cannot be used for some reason, use a predefined sequence of keys. - -### CPU performance -The table below lists the performance of selected CPUs using the optimal number of threads (T) and large pages (if possible), in hashes per second (H/s). "CNv4" refers to the CryptoNight variant 4 (CN/R) hashrate measured using [XMRig](https://github.com/xmrig/xmrig) v2.14.1. "Fast mode" and "Light mode" are the two modes of RandomX. - -|CPU|RAM|OS|AES|CNv4|Fast mode|Light mode| -|---|---|--|---|-----|------|--------------| -Intel Core i9-9900K|32G DDR4-3200|Windows 10|hw|660 (8T)|5770 (8T)|1160 (16T)| -AMD Ryzen 7 1700|16G DDR4-2666|Ubuntu 16.04|hw|520 (8T)|4100 (8T)|620 (16T)| -Intel Core i7-8550U|16G DDR4-2400|Windows 10|hw|200 (4T)|1700 (4T)|350 (8T)| -Intel Core i3-3220|4G DDR3-1333|Ubuntu 16.04|soft|42 (4T)|510 (4T)|150 (4T)| -Raspberry Pi 3|1G LPDDR2|Ubuntu 16.04|soft|3.5 (4T)|-|20 (4T)| - -Note that RandomX currently includes a JIT compiler for x86-64 and ARM64. Other architectures have to use the portable interpreter, which is much slower. - -### GPU performance - -SChernykh is developing GPU mining code for RandomX. Benchmarks are included in the following repositories: - -* [CUDA miner](https://github.com/SChernykh/RandomX_CUDA) - NVIDIA GPUs. -* [OpenCL miner](https://github.com/SChernykh/RandomX_OpenCL) - only for AMD Vega and AMD Polaris GPUs (uses GCN machine code). - -The code from the above repositories is included in the open source miner [XMRig](https://github.com/xmrig/xmrig). - -Note that GPUs are at a disadvantage when running RandomX since the algorithm was designed to be efficient on CPUs. - -# FAQ - -### Which CPU is best for mining RandomX? - -Most Intel and AMD CPUs made since 2011 should be fairly efficient at RandomX. More specifically, efficient mining requires: - -* 64-bit architecture -* IEEE 754 compliant floating point unit -* Hardware AES support ([AES-NI](https://en.wikipedia.org/wiki/AES_instruction_set) extension for x86, Cryptography extensions for ARMv8) -* 16 KiB of L1 cache, 256 KiB of L2 cache and 2 MiB of L3 cache per mining thread -* Support for large memory pages -* At least 2.5 GiB of free RAM per NUMA node -* Multiple memory channels may be required: - * DDR3 memory is limited to about 1500-2000 H/s per channel (depending on frequency and timings) - * DDR4 memory is limited to about 4000-6000 H/s per channel (depending on frequency and timings) - -### Does RandomX facilitate botnets/malware mining or web mining? - -Due to the way the algorithm works, mining malware is much easier to detect. [RandomX Sniffer](https://github.com/tevador/randomx-sniffer) is a proof of concept tool that can detect illicit mining activity on Windows. - -Efficient mining requires more than 2 GiB of memory, which also disqualifies many low-end machines such as IoT devices, which are often parts of large botnets. - -Web mining is infeasible due to the large memory requirement and the lack of directed rounding support for floating point operations in both Javascript and WebAssembly. - -### Since RandomX uses floating point math, does it give reproducible results on different platforms? - -RandomX uses only operations that are guaranteed to give correctly rounded results by the [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754) standard: addition, subtraction, multiplication, division and square root. Special care is taken to avoid corner cases such as NaN values or denormals. - -The reference implementation has been validated on the following platforms: -* x86 (32-bit, little-endian) -* x86-64 (64-bit, little-endian) -* ARMv7+VFPv3 (32-bit, little-endian) -* ARMv8 (64-bit, little-endian) -* PPC64 (64-bit, big-endian) - -### Can FPGAs mine RandomX? - -RandomX generates multiple unique programs for every hash, so FPGAs cannot dynamically reconfigure their circuitry because typical FPGA takes tens of seconds to load a bitstream. It is also not possible to generate bitstreams for RandomX programs in advance due to the sheer number of combinations (there are 2512 unique programs). - -Sufficiently large FPGAs can mine RandomX in a [soft microprocessor](https://en.wikipedia.org/wiki/Soft_microprocessor) configuration by emulating a CPU. Under these circumstances, an FPGA will be much less efficient than a CPU or a specialized chip (ASIC). - -## Acknowledgements -* [tevador](https://github.com/tevador) - author -* [SChernykh](https://github.com/SChernykh) - contributed significantly to the design of RandomX -* [hyc](https://github.com/hyc) - original idea of using random code execution for PoW -* [Other contributors](https://github.com/tevador/RandomX/graphs/contributors) - -RandomX uses some source code from the following 3rd party repositories: -* Argon2d, Blake2b hashing functions: https://github.com/P-H-C/phc-winner-argon2 - -The author of RandomX declares no competing financial interest. - -## Donations - -If you'd like to use RandomX, please consider donating to help cover the development cost of the algorithm. - -Author's XMR address: -``` -845xHUh5GvfHwc2R8DVJCE7BT2sd4YEcmjG8GNSdmeNsP5DTEjXd1CNgxTcjHjiFuthRHAoVEJjM7GyKzQKLJtbd56xbh7V -``` -Total donations received: ~3.86 XMR (as of 30th August 2019). Thanks to all contributors. diff --git a/external/src/randomx/src/aes_hash.cpp b/external/src/randomx/src/aes_hash.cpp deleted file mode 100644 index a3b7395..0000000 --- a/external/src/randomx/src/aes_hash.cpp +++ /dev/null @@ -1,322 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include "soft_aes.h" -#include - -//NOTE: The functions below were tuned for maximum performance -//and are not cryptographically secure outside of the scope of RandomX. -//It's not recommended to use them as general hash functions and PRNGs. - -//AesHash1R: -//state0, state1, state2, state3 = Blake2b-512("RandomX AesHash1R state") -//xkey0, xkey1 = Blake2b-256("RandomX AesHash1R xkeys") - -#define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d -#define AES_HASH_1R_STATE1 0xace78057, 0xf59e125a, 0x15c7b798, 0x338d996e -#define AES_HASH_1R_STATE2 0xe8a07ce4, 0x5079506b, 0xae62c7d0, 0x6a770017 -#define AES_HASH_1R_STATE3 0x7e994948, 0x79a10005, 0x07ad828d, 0x630a240c - -#define AES_HASH_1R_XKEY0 0x06890201, 0x90dc56bf, 0x8b24949f, 0xf6fa8389 -#define AES_HASH_1R_XKEY1 0xed18f99b, 0xee1043c6, 0x51f4e03c, 0x61b263d1 - -/* - Calculate a 512-bit hash of 'input' using 4 lanes of AES. - The input is treated as a set of round keys for the encryption - of the initial state. - - 'inputSize' must be a multiple of 64. - - For a 2 MiB input, this has the same security as 32768-round - AES encryption. - - Hashing throughput: >20 GiB/s per CPU core with hardware AES -*/ -template -void hashAes1Rx4(const void *input, size_t inputSize, void *hash) { - assert(inputSize % 64 == 0); - const uint8_t* inptr = (uint8_t*)input; - const uint8_t* inputEnd = inptr + inputSize; - - rx_vec_i128 state0, state1, state2, state3; - rx_vec_i128 in0, in1, in2, in3; - - //intial state - state0 = rx_set_int_vec_i128(AES_HASH_1R_STATE0); - state1 = rx_set_int_vec_i128(AES_HASH_1R_STATE1); - state2 = rx_set_int_vec_i128(AES_HASH_1R_STATE2); - state3 = rx_set_int_vec_i128(AES_HASH_1R_STATE3); - - //process 64 bytes at a time in 4 lanes - while (inptr < inputEnd) { - in0 = rx_load_vec_i128((rx_vec_i128*)inptr + 0); - in1 = rx_load_vec_i128((rx_vec_i128*)inptr + 1); - in2 = rx_load_vec_i128((rx_vec_i128*)inptr + 2); - in3 = rx_load_vec_i128((rx_vec_i128*)inptr + 3); - - state0 = aesenc(state0, in0); - state1 = aesdec(state1, in1); - state2 = aesenc(state2, in2); - state3 = aesdec(state3, in3); - - inptr += 64; - } - - //two extra rounds to achieve full diffusion - rx_vec_i128 xkey0 = rx_set_int_vec_i128(AES_HASH_1R_XKEY0); - rx_vec_i128 xkey1 = rx_set_int_vec_i128(AES_HASH_1R_XKEY1); - - state0 = aesenc(state0, xkey0); - state1 = aesdec(state1, xkey0); - state2 = aesenc(state2, xkey0); - state3 = aesdec(state3, xkey0); - - state0 = aesenc(state0, xkey1); - state1 = aesdec(state1, xkey1); - state2 = aesenc(state2, xkey1); - state3 = aesdec(state3, xkey1); - - //output hash - rx_store_vec_i128((rx_vec_i128*)hash + 0, state0); - rx_store_vec_i128((rx_vec_i128*)hash + 1, state1); - rx_store_vec_i128((rx_vec_i128*)hash + 2, state2); - rx_store_vec_i128((rx_vec_i128*)hash + 3, state3); -} - -template void hashAes1Rx4(const void *input, size_t inputSize, void *hash); -template void hashAes1Rx4(const void *input, size_t inputSize, void *hash); - -//AesGenerator1R: -//key0, key1, key2, key3 = Blake2b-512("RandomX AesGenerator1R keys") - -#define AES_GEN_1R_KEY0 0xb4f44917, 0xdbb5552b, 0x62716609, 0x6daca553 -#define AES_GEN_1R_KEY1 0x0da1dc4e, 0x1725d378, 0x846a710d, 0x6d7caf07 -#define AES_GEN_1R_KEY2 0x3e20e345, 0xf4c0794f, 0x9f947ec6, 0x3f1262f1 -#define AES_GEN_1R_KEY3 0x49169154, 0x16314c88, 0xb1ba317c, 0x6aef8135 - -/* - Fill 'buffer' with pseudorandom data based on 512-bit 'state'. - The state is encrypted using a single AES round per 16 bytes of output - in 4 lanes. - - 'outputSize' must be a multiple of 64. - - The modified state is written back to 'state' to allow multiple - calls to this function. -*/ -template -void fillAes1Rx4(void *state, size_t outputSize, void *buffer) { - assert(outputSize % 64 == 0); - const uint8_t* outptr = (uint8_t*)buffer; - const uint8_t* outputEnd = outptr + outputSize; - - rx_vec_i128 state0, state1, state2, state3; - rx_vec_i128 key0, key1, key2, key3; - - key0 = rx_set_int_vec_i128(AES_GEN_1R_KEY0); - key1 = rx_set_int_vec_i128(AES_GEN_1R_KEY1); - key2 = rx_set_int_vec_i128(AES_GEN_1R_KEY2); - key3 = rx_set_int_vec_i128(AES_GEN_1R_KEY3); - - state0 = rx_load_vec_i128((rx_vec_i128*)state + 0); - state1 = rx_load_vec_i128((rx_vec_i128*)state + 1); - state2 = rx_load_vec_i128((rx_vec_i128*)state + 2); - state3 = rx_load_vec_i128((rx_vec_i128*)state + 3); - - while (outptr < outputEnd) { - state0 = aesdec(state0, key0); - state1 = aesenc(state1, key1); - state2 = aesdec(state2, key2); - state3 = aesenc(state3, key3); - - rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0); - rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1); - rx_store_vec_i128((rx_vec_i128*)outptr + 2, state2); - rx_store_vec_i128((rx_vec_i128*)outptr + 3, state3); - - outptr += 64; - } - - rx_store_vec_i128((rx_vec_i128*)state + 0, state0); - rx_store_vec_i128((rx_vec_i128*)state + 1, state1); - rx_store_vec_i128((rx_vec_i128*)state + 2, state2); - rx_store_vec_i128((rx_vec_i128*)state + 3, state3); -} - -template void fillAes1Rx4(void *state, size_t outputSize, void *buffer); -template void fillAes1Rx4(void *state, size_t outputSize, void *buffer); - -//AesGenerator4R: -//key0, key1, key2, key3 = Blake2b-512("RandomX AesGenerator4R keys 0-3") -//key4, key5, key6, key7 = Blake2b-512("RandomX AesGenerator4R keys 4-7") - -#define AES_GEN_4R_KEY0 0x99e5d23f, 0x2f546d2b, 0xd1833ddb, 0x6421aadd -#define AES_GEN_4R_KEY1 0xa5dfcde5, 0x06f79d53, 0xb6913f55, 0xb20e3450 -#define AES_GEN_4R_KEY2 0x171c02bf, 0x0aa4679f, 0x515e7baf, 0x5c3ed904 -#define AES_GEN_4R_KEY3 0xd8ded291, 0xcd673785, 0xe78f5d08, 0x85623763 -#define AES_GEN_4R_KEY4 0x229effb4, 0x3d518b6d, 0xe3d6a7a6, 0xb5826f73 -#define AES_GEN_4R_KEY5 0xb272b7d2, 0xe9024d4e, 0x9c10b3d9, 0xc7566bf3 -#define AES_GEN_4R_KEY6 0xf63befa7, 0x2ba9660a, 0xf765a38b, 0xf273c9e7 -#define AES_GEN_4R_KEY7 0xc0b0762d, 0x0c06d1fd, 0x915839de, 0x7a7cd609 - -template -void fillAes4Rx4(void *state, size_t outputSize, void *buffer) { - assert(outputSize % 64 == 0); - const uint8_t* outptr = (uint8_t*)buffer; - const uint8_t* outputEnd = outptr + outputSize; - - rx_vec_i128 state0, state1, state2, state3; - rx_vec_i128 key0, key1, key2, key3, key4, key5, key6, key7; - - key0 = rx_set_int_vec_i128(AES_GEN_4R_KEY0); - key1 = rx_set_int_vec_i128(AES_GEN_4R_KEY1); - key2 = rx_set_int_vec_i128(AES_GEN_4R_KEY2); - key3 = rx_set_int_vec_i128(AES_GEN_4R_KEY3); - key4 = rx_set_int_vec_i128(AES_GEN_4R_KEY4); - key5 = rx_set_int_vec_i128(AES_GEN_4R_KEY5); - key6 = rx_set_int_vec_i128(AES_GEN_4R_KEY6); - key7 = rx_set_int_vec_i128(AES_GEN_4R_KEY7); - - state0 = rx_load_vec_i128((rx_vec_i128*)state + 0); - state1 = rx_load_vec_i128((rx_vec_i128*)state + 1); - state2 = rx_load_vec_i128((rx_vec_i128*)state + 2); - state3 = rx_load_vec_i128((rx_vec_i128*)state + 3); - - while (outptr < outputEnd) { - state0 = aesdec(state0, key0); - state1 = aesenc(state1, key0); - state2 = aesdec(state2, key4); - state3 = aesenc(state3, key4); - - state0 = aesdec(state0, key1); - state1 = aesenc(state1, key1); - state2 = aesdec(state2, key5); - state3 = aesenc(state3, key5); - - state0 = aesdec(state0, key2); - state1 = aesenc(state1, key2); - state2 = aesdec(state2, key6); - state3 = aesenc(state3, key6); - - state0 = aesdec(state0, key3); - state1 = aesenc(state1, key3); - state2 = aesdec(state2, key7); - state3 = aesenc(state3, key7); - - rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0); - rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1); - rx_store_vec_i128((rx_vec_i128*)outptr + 2, state2); - rx_store_vec_i128((rx_vec_i128*)outptr + 3, state3); - - outptr += 64; - } -} - -template void fillAes4Rx4(void *state, size_t outputSize, void *buffer); -template void fillAes4Rx4(void *state, size_t outputSize, void *buffer); - -template -void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) { - uint8_t* scratchpadPtr = (uint8_t*)scratchpad; - const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize; - - // initial state - rx_vec_i128 hash_state0 = rx_set_int_vec_i128(AES_HASH_1R_STATE0); - rx_vec_i128 hash_state1 = rx_set_int_vec_i128(AES_HASH_1R_STATE1); - rx_vec_i128 hash_state2 = rx_set_int_vec_i128(AES_HASH_1R_STATE2); - rx_vec_i128 hash_state3 = rx_set_int_vec_i128(AES_HASH_1R_STATE3); - - const rx_vec_i128 key0 = rx_set_int_vec_i128(AES_GEN_1R_KEY0); - const rx_vec_i128 key1 = rx_set_int_vec_i128(AES_GEN_1R_KEY1); - const rx_vec_i128 key2 = rx_set_int_vec_i128(AES_GEN_1R_KEY2); - const rx_vec_i128 key3 = rx_set_int_vec_i128(AES_GEN_1R_KEY3); - - rx_vec_i128 fill_state0 = rx_load_vec_i128((rx_vec_i128*)fill_state + 0); - rx_vec_i128 fill_state1 = rx_load_vec_i128((rx_vec_i128*)fill_state + 1); - rx_vec_i128 fill_state2 = rx_load_vec_i128((rx_vec_i128*)fill_state + 2); - rx_vec_i128 fill_state3 = rx_load_vec_i128((rx_vec_i128*)fill_state + 3); - - constexpr int PREFETCH_DISTANCE = 4096; - const char* prefetchPtr = ((const char*)scratchpad) + PREFETCH_DISTANCE; - scratchpadEnd -= PREFETCH_DISTANCE; - - for (int i = 0; i < 2; ++i) { - //process 64 bytes at a time in 4 lanes - while (scratchpadPtr < scratchpadEnd) { - hash_state0 = aesenc(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 0)); - hash_state1 = aesdec(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 1)); - hash_state2 = aesenc(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 2)); - hash_state3 = aesdec(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 3)); - - fill_state0 = aesdec(fill_state0, key0); - fill_state1 = aesenc(fill_state1, key1); - fill_state2 = aesdec(fill_state2, key2); - fill_state3 = aesenc(fill_state3, key3); - - rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 0, fill_state0); - rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 1, fill_state1); - rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 2, fill_state2); - rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 3, fill_state3); - - rx_prefetch_t0(prefetchPtr); - - scratchpadPtr += 64; - prefetchPtr += 64; - } - prefetchPtr = (const char*) scratchpad; - scratchpadEnd += PREFETCH_DISTANCE; - } - - rx_store_vec_i128((rx_vec_i128*)fill_state + 0, fill_state0); - rx_store_vec_i128((rx_vec_i128*)fill_state + 1, fill_state1); - rx_store_vec_i128((rx_vec_i128*)fill_state + 2, fill_state2); - rx_store_vec_i128((rx_vec_i128*)fill_state + 3, fill_state3); - - //two extra rounds to achieve full diffusion - rx_vec_i128 xkey0 = rx_set_int_vec_i128(AES_HASH_1R_XKEY0); - rx_vec_i128 xkey1 = rx_set_int_vec_i128(AES_HASH_1R_XKEY1); - - hash_state0 = aesenc(hash_state0, xkey0); - hash_state1 = aesdec(hash_state1, xkey0); - hash_state2 = aesenc(hash_state2, xkey0); - hash_state3 = aesdec(hash_state3, xkey0); - - hash_state0 = aesenc(hash_state0, xkey1); - hash_state1 = aesdec(hash_state1, xkey1); - hash_state2 = aesenc(hash_state2, xkey1); - hash_state3 = aesdec(hash_state3, xkey1); - - //output hash - rx_store_vec_i128((rx_vec_i128*)hash + 0, hash_state0); - rx_store_vec_i128((rx_vec_i128*)hash + 1, hash_state1); - rx_store_vec_i128((rx_vec_i128*)hash + 2, hash_state2); - rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3); -} - -template void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state); -template void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state); diff --git a/external/src/randomx/src/aes_hash.hpp b/external/src/randomx/src/aes_hash.hpp deleted file mode 100644 index 9f75f73..0000000 --- a/external/src/randomx/src/aes_hash.hpp +++ /dev/null @@ -1,43 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include - -template -void hashAes1Rx4(const void *input, size_t inputSize, void *hash); - -template -void fillAes1Rx4(void *state, size_t outputSize, void *buffer); - -template -void fillAes4Rx4(void *state, size_t outputSize, void *buffer); - -template -void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state); diff --git a/external/src/randomx/src/allocator.cpp b/external/src/randomx/src/allocator.cpp deleted file mode 100644 index 4c6d86e..0000000 --- a/external/src/randomx/src/allocator.cpp +++ /dev/null @@ -1,60 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include -#include "allocator.hpp" -#include "intrin_portable.h" -#include "virtual_memory.hpp" -#include "common.hpp" - -namespace randomx { - - template - void* AlignedAllocator::allocMemory(size_t count) { - void *mem = rx_aligned_alloc(count, alignment); - if (mem == nullptr) - throw std::bad_alloc(); - return mem; - } - - template - void AlignedAllocator::freeMemory(void* ptr, size_t count) { - rx_aligned_free(ptr); - } - - template struct AlignedAllocator; - - void* LargePageAllocator::allocMemory(size_t count) { - return allocLargePagesMemory(count); - } - - void LargePageAllocator::freeMemory(void* ptr, size_t count) { - freePagedMemory(ptr, count); - }; - -} \ No newline at end of file diff --git a/external/src/randomx/src/allocator.hpp b/external/src/randomx/src/allocator.hpp deleted file mode 100644 index d7aa3f9..0000000 --- a/external/src/randomx/src/allocator.hpp +++ /dev/null @@ -1,46 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include - -namespace randomx { - - template - struct AlignedAllocator { - static void* allocMemory(size_t); - static void freeMemory(void*, size_t); - }; - - struct LargePageAllocator { - static void* allocMemory(size_t); - static void freeMemory(void*, size_t); - }; - -} \ No newline at end of file diff --git a/external/src/randomx/src/argon2.h b/external/src/randomx/src/argon2.h deleted file mode 100644 index 9052f42..0000000 --- a/external/src/randomx/src/argon2.h +++ /dev/null @@ -1,261 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* Original code from Argon2 reference source code package used under CC0 Licence - * https://github.com/P-H-C/phc-winner-argon2 - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves -*/ - -#pragma once - -#include -#include -#include - -/* - * Argon2 input parameter restrictions - */ - - /* Minimum and maximum number of lanes (degree of parallelism) */ -#define ARGON2_MIN_LANES UINT32_C(1) -#define ARGON2_MAX_LANES UINT32_C(0xFFFFFF) - -/* Minimum and maximum number of threads */ -#define ARGON2_MIN_THREADS UINT32_C(1) -#define ARGON2_MAX_THREADS UINT32_C(0xFFFFFF) - -/* Number of synchronization points between lanes per pass */ -#define ARGON2_SYNC_POINTS UINT32_C(4) - -/* Minimum and maximum digest size in bytes */ -#define ARGON2_MIN_OUTLEN UINT32_C(4) -#define ARGON2_MAX_OUTLEN UINT32_C(0xFFFFFFFF) - -/* Minimum and maximum number of memory blocks (each of BLOCK_SIZE bytes) */ -#define ARGON2_MIN_MEMORY (2 * ARGON2_SYNC_POINTS) /* 2 blocks per slice */ - -#define ARGON2_MIN(a, b) ((a) < (b) ? (a) : (b)) -/* Max memory size is addressing-space/2, topping at 2^32 blocks (4 TB) */ -#define ARGON2_MAX_MEMORY_BITS \ - ARGON2_MIN(UINT32_C(32), (sizeof(void *) * CHAR_BIT - 10 - 1)) -#define ARGON2_MAX_MEMORY \ - ARGON2_MIN(UINT32_C(0xFFFFFFFF), UINT64_C(1) << ARGON2_MAX_MEMORY_BITS) - -/* Minimum and maximum number of passes */ -#define ARGON2_MIN_TIME UINT32_C(1) -#define ARGON2_MAX_TIME UINT32_C(0xFFFFFFFF) - -/* Minimum and maximum password length in bytes */ -#define ARGON2_MIN_PWD_LENGTH UINT32_C(0) -#define ARGON2_MAX_PWD_LENGTH UINT32_C(0xFFFFFFFF) - -/* Minimum and maximum associated data length in bytes */ -#define ARGON2_MIN_AD_LENGTH UINT32_C(0) -#define ARGON2_MAX_AD_LENGTH UINT32_C(0xFFFFFFFF) - -/* Minimum and maximum salt length in bytes */ -#define ARGON2_MIN_SALT_LENGTH UINT32_C(8) -#define ARGON2_MAX_SALT_LENGTH UINT32_C(0xFFFFFFFF) - -/* Minimum and maximum key length in bytes */ -#define ARGON2_MIN_SECRET UINT32_C(0) -#define ARGON2_MAX_SECRET UINT32_C(0xFFFFFFFF) - -/* Flags to determine which fields are securely wiped (default = no wipe). */ -#define ARGON2_DEFAULT_FLAGS UINT32_C(0) -#define ARGON2_FLAG_CLEAR_PASSWORD (UINT32_C(1) << 0) -#define ARGON2_FLAG_CLEAR_SECRET (UINT32_C(1) << 1) - - -/* Error codes */ -typedef enum Argon2_ErrorCodes { - ARGON2_OK = 0, - - ARGON2_OUTPUT_PTR_NULL = -1, - - ARGON2_OUTPUT_TOO_SHORT = -2, - ARGON2_OUTPUT_TOO_LONG = -3, - - ARGON2_PWD_TOO_SHORT = -4, - ARGON2_PWD_TOO_LONG = -5, - - ARGON2_SALT_TOO_SHORT = -6, - ARGON2_SALT_TOO_LONG = -7, - - ARGON2_AD_TOO_SHORT = -8, - ARGON2_AD_TOO_LONG = -9, - - ARGON2_SECRET_TOO_SHORT = -10, - ARGON2_SECRET_TOO_LONG = -11, - - ARGON2_TIME_TOO_SMALL = -12, - ARGON2_TIME_TOO_LARGE = -13, - - ARGON2_MEMORY_TOO_LITTLE = -14, - ARGON2_MEMORY_TOO_MUCH = -15, - - ARGON2_LANES_TOO_FEW = -16, - ARGON2_LANES_TOO_MANY = -17, - - ARGON2_PWD_PTR_MISMATCH = -18, /* NULL ptr with non-zero length */ - ARGON2_SALT_PTR_MISMATCH = -19, /* NULL ptr with non-zero length */ - ARGON2_SECRET_PTR_MISMATCH = -20, /* NULL ptr with non-zero length */ - ARGON2_AD_PTR_MISMATCH = -21, /* NULL ptr with non-zero length */ - - ARGON2_MEMORY_ALLOCATION_ERROR = -22, - - ARGON2_FREE_MEMORY_CBK_NULL = -23, - ARGON2_ALLOCATE_MEMORY_CBK_NULL = -24, - - ARGON2_INCORRECT_PARAMETER = -25, - ARGON2_INCORRECT_TYPE = -26, - - ARGON2_OUT_PTR_MISMATCH = -27, - - ARGON2_THREADS_TOO_FEW = -28, - ARGON2_THREADS_TOO_MANY = -29, - - ARGON2_MISSING_ARGS = -30, - - ARGON2_ENCODING_FAIL = -31, - - ARGON2_DECODING_FAIL = -32, - - ARGON2_THREAD_FAIL = -33, - - ARGON2_DECODING_LENGTH_FAIL = -34, - - ARGON2_VERIFY_MISMATCH = -35 -} argon2_error_codes; - -/* Memory allocator types --- for external allocation */ -typedef int(*allocate_fptr)(uint8_t **memory, size_t bytes_to_allocate); -typedef void(*deallocate_fptr)(uint8_t *memory, size_t bytes_to_allocate); - -/* Argon2 external data structures */ - -/* - ***** - * Context: structure to hold Argon2 inputs: - * output array and its length, - * password and its length, - * salt and its length, - * secret and its length, - * associated data and its length, - * number of passes, amount of used memory (in KBytes, can be rounded up a bit) - * number of parallel threads that will be run. - * All the parameters above affect the output hash value. - * Additionally, two function pointers can be provided to allocate and - * deallocate the memory (if NULL, memory will be allocated internally). - * Also, three flags indicate whether to erase password, secret as soon as they - * are pre-hashed (and thus not needed anymore), and the entire memory - ***** - * Simplest situation: you have output array out[8], password is stored in - * pwd[32], salt is stored in salt[16], you do not have keys nor associated - * data. You need to spend 1 GB of RAM and you run 5 passes of Argon2d with - * 4 parallel lanes. - * You want to erase the password, but you're OK with last pass not being - * erased. You want to use the default memory allocator. - * Then you initialize: - Argon2_Context(out,8,pwd,32,salt,16,NULL,0,NULL,0,5,1<<20,4,4,NULL,NULL,true,false,false,false) - */ -typedef struct Argon2_Context { - uint8_t *out; /* output array */ - uint32_t outlen; /* digest length */ - - uint8_t *pwd; /* password array */ - uint32_t pwdlen; /* password length */ - - uint8_t *salt; /* salt array */ - uint32_t saltlen; /* salt length */ - - uint8_t *secret; /* key array */ - uint32_t secretlen; /* key length */ - - uint8_t *ad; /* associated data array */ - uint32_t adlen; /* associated data length */ - - uint32_t t_cost; /* number of passes */ - uint32_t m_cost; /* amount of memory requested (KB) */ - uint32_t lanes; /* number of lanes */ - uint32_t threads; /* maximum number of threads */ - - uint32_t version; /* version number */ - - allocate_fptr allocate_cbk; /* pointer to memory allocator */ - deallocate_fptr free_cbk; /* pointer to memory deallocator */ - - uint32_t flags; /* array of bool options */ -} argon2_context; - -/* Argon2 primitive type */ -typedef enum Argon2_type { - Argon2_d = 0, - Argon2_i = 1, - Argon2_id = 2 -} argon2_type; - -/* Version of the algorithm */ -typedef enum Argon2_version { - ARGON2_VERSION_10 = 0x10, - ARGON2_VERSION_13 = 0x13, - ARGON2_VERSION_NUMBER = ARGON2_VERSION_13 -} argon2_version; - -//Argon2 instance - forward declaration -typedef struct Argon2_instance_t argon2_instance_t; - -//Argon2 position = forward declaration -typedef struct Argon2_position_t argon2_position_t; - -//Argon2 implementation function -typedef void randomx_argon2_impl(const argon2_instance_t* instance, - argon2_position_t position); - -#if defined(__cplusplus) -extern "C" { -#endif - -/* - * Function that fills the segment using previous segments also from other - * threads - * @param context current context - * @param instance Pointer to the current instance - * @param position Current position - * @pre all block pointers must be valid - */ -void randomx_argon2_fill_segment_ref(const argon2_instance_t* instance, - argon2_position_t position); - -randomx_argon2_impl *randomx_argon2_impl_ssse3(); -randomx_argon2_impl *randomx_argon2_impl_avx2(); - -#if defined(__cplusplus) -} -#endif diff --git a/external/src/randomx/src/argon2_avx2.c b/external/src/randomx/src/argon2_avx2.c deleted file mode 100644 index 2135303..0000000 --- a/external/src/randomx/src/argon2_avx2.c +++ /dev/null @@ -1,174 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* Original code from Argon2 reference source code package used under CC0 Licence - * https://github.com/P-H-C/phc-winner-argon2 - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves -*/ - -#include -#include -#include - -#include "argon2.h" - -void randomx_argon2_fill_segment_avx2(const argon2_instance_t* instance, - argon2_position_t position); - -randomx_argon2_impl* randomx_argon2_impl_avx2() { -#if defined(__AVX2__) - return &randomx_argon2_fill_segment_avx2; -#endif - return NULL; -} - -#if defined(__AVX2__) - -#include "argon2_core.h" - -#include "blake2/blamka-round-avx2.h" -#include "blake2/blake2-impl.h" -#include "blake2/blake2.h" - -static void fill_block(__m256i* state, const block* ref_block, - block* next_block, int with_xor) { - __m256i block_XY[ARGON2_HWORDS_IN_BLOCK]; - unsigned int i; - - if (with_xor) { - for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { - state[i] = _mm256_xor_si256( - state[i], _mm256_loadu_si256((const __m256i*)ref_block->v + i)); - block_XY[i] = _mm256_xor_si256( - state[i], _mm256_loadu_si256((const __m256i*)next_block->v + i)); - } - } - else { - for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { - block_XY[i] = state[i] = _mm256_xor_si256( - state[i], _mm256_loadu_si256((const __m256i*)ref_block->v + i)); - } - } - - for (i = 0; i < 4; ++i) { - BLAKE2_ROUND_1(state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5], - state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]); - } - - for (i = 0; i < 4; ++i) { - BLAKE2_ROUND_2(state[0 + i], state[4 + i], state[8 + i], state[12 + i], - state[16 + i], state[20 + i], state[24 + i], state[28 + i]); - } - - for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { - state[i] = _mm256_xor_si256(state[i], block_XY[i]); - _mm256_storeu_si256((__m256i*)next_block->v + i, state[i]); - } -} - -void randomx_argon2_fill_segment_avx2(const argon2_instance_t* instance, - argon2_position_t position) { - block* ref_block = NULL, * curr_block = NULL; - block address_block, input_block; - uint64_t pseudo_rand, ref_index, ref_lane; - uint32_t prev_offset, curr_offset; - uint32_t starting_index, i; - __m256i state[ARGON2_HWORDS_IN_BLOCK]; - - if (instance == NULL) { - return; - } - - starting_index = 0; - - if ((0 == position.pass) && (0 == position.slice)) { - starting_index = 2; /* we have already generated the first two blocks */ - } - - /* Offset of the current block */ - curr_offset = position.lane * instance->lane_length + - position.slice * instance->segment_length + starting_index; - - if (0 == curr_offset % instance->lane_length) { - /* Last block in this lane */ - prev_offset = curr_offset + instance->lane_length - 1; - } - else { - /* Previous block */ - prev_offset = curr_offset - 1; - } - - memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE); - - for (i = starting_index; i < instance->segment_length; - ++i, ++curr_offset, ++prev_offset) { - /*1.1 Rotating prev_offset if needed */ - if (curr_offset % instance->lane_length == 1) { - prev_offset = curr_offset - 1; - } - - /* 1.2 Computing the index of the reference block */ - /* 1.2.1 Taking pseudo-random value from the previous block */ - pseudo_rand = instance->memory[prev_offset].v[0]; - - /* 1.2.2 Computing the lane of the reference block */ - ref_lane = ((pseudo_rand >> 32)) % instance->lanes; - - if ((position.pass == 0) && (position.slice == 0)) { - /* Can not reference other lanes yet */ - ref_lane = position.lane; - } - - /* 1.2.3 Computing the number of possible reference block within the - * lane. - */ - position.index = i; - ref_index = randomx_argon2_index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, - ref_lane == position.lane); - - /* 2 Creating a new block */ - ref_block = - instance->memory + instance->lane_length * ref_lane + ref_index; - curr_block = instance->memory + curr_offset; - if (ARGON2_VERSION_10 == instance->version) { - /* version 1.2.1 and earlier: overwrite, not XOR */ - fill_block(state, ref_block, curr_block, 0); - } - else { - if (0 == position.pass) { - fill_block(state, ref_block, curr_block, 0); - } - else { - fill_block(state, ref_block, curr_block, 1); - } - } - } -} - -#endif diff --git a/external/src/randomx/src/argon2_core.c b/external/src/randomx/src/argon2_core.c deleted file mode 100644 index f2e7f3d..0000000 --- a/external/src/randomx/src/argon2_core.c +++ /dev/null @@ -1,411 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* Original code from Argon2 reference source code package used under CC0 Licence - * https://github.com/P-H-C/phc-winner-argon2 - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves -*/ - - /*For memory wiping*/ -#ifdef _MSC_VER -#include -#include /* For SecureZeroMemory */ -#endif -#if defined __STDC_LIB_EXT1__ -#define __STDC_WANT_LIB_EXT1__ 1 -#endif -#define VC_GE_2005(version) (version >= 1400) - -#include -#include -#include - -#include "argon2_core.h" -#include "blake2/blake2.h" -#include "blake2/blake2-impl.h" - -#ifdef GENKAT -#include "genkat.h" -#endif - -#if defined(__clang__) -#if __has_attribute(optnone) -#define NOT_OPTIMIZED __attribute__((optnone)) -#endif -#elif defined(__GNUC__) -#define GCC_VERSION \ - (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) -#if GCC_VERSION >= 40400 -#define NOT_OPTIMIZED __attribute__((optimize("O0"))) -#endif -#endif -#ifndef NOT_OPTIMIZED -#define NOT_OPTIMIZED -#endif - -/***************Instance and Position constructors**********/ - -static void load_block(block *dst, const void *input) { - unsigned i; - for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { - dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i])); - } -} - -static void store_block(void *output, const block *src) { - unsigned i; - for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { - store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]); - } -} - -uint32_t randomx_argon2_index_alpha(const argon2_instance_t *instance, - const argon2_position_t *position, uint32_t pseudo_rand, - int same_lane) { - /* - * Pass 0: - * This lane : all already finished segments plus already constructed - * blocks in this segment - * Other lanes : all already finished segments - * Pass 1+: - * This lane : (SYNC_POINTS - 1) last segments plus already constructed - * blocks in this segment - * Other lanes : (SYNC_POINTS - 1) last segments - */ - uint32_t reference_area_size; - uint64_t relative_position; - uint32_t start_position, absolute_position; - - if (0 == position->pass) { - /* First pass */ - if (0 == position->slice) { - /* First slice */ - reference_area_size = - position->index - 1; /* all but the previous */ - } - else { - if (same_lane) { - /* The same lane => add current segment */ - reference_area_size = - position->slice * instance->segment_length + - position->index - 1; - } - else { - reference_area_size = - position->slice * instance->segment_length + - ((position->index == 0) ? (-1) : 0); - } - } - } - else { - /* Second pass */ - if (same_lane) { - reference_area_size = instance->lane_length - - instance->segment_length + position->index - - 1; - } - else { - reference_area_size = instance->lane_length - - instance->segment_length + - ((position->index == 0) ? (-1) : 0); - } - } - - /* 1.2.4. Mapping pseudo_rand to 0.. and produce - * relative position */ - relative_position = pseudo_rand; - relative_position = relative_position * relative_position >> 32; - relative_position = reference_area_size - 1 - - (reference_area_size * relative_position >> 32); - - /* 1.2.5 Computing starting position */ - start_position = 0; - - if (0 != position->pass) { - start_position = (position->slice == ARGON2_SYNC_POINTS - 1) - ? 0 - : (position->slice + 1) * instance->segment_length; - } - - /* 1.2.6. Computing absolute position */ - absolute_position = (start_position + relative_position) % - instance->lane_length; /* absolute position */ - return absolute_position; -} - -/* Single-threaded version for p=1 case */ -static int fill_memory_blocks_st(argon2_instance_t *instance) { - uint32_t r, s, l; - - for (r = 0; r < instance->passes; ++r) { - for (s = 0; s < ARGON2_SYNC_POINTS; ++s) { - for (l = 0; l < instance->lanes; ++l) { - argon2_position_t position = { r, l, (uint8_t)s, 0 }; - //fill the segment using the selected implementation - instance->impl(instance, position); - } - } - } - return ARGON2_OK; -} - -int randomx_argon2_fill_memory_blocks(argon2_instance_t *instance) { - if (instance == NULL || instance->lanes == 0) { - return ARGON2_INCORRECT_PARAMETER; - } - return fill_memory_blocks_st(instance); -} - -int randomx_argon2_validate_inputs(const argon2_context *context) { - if (NULL == context) { - return ARGON2_INCORRECT_PARAMETER; - } - - /* Validate password (required param) */ - if (NULL == context->pwd) { - if (0 != context->pwdlen) { - return ARGON2_PWD_PTR_MISMATCH; - } - } - - if (ARGON2_MIN_PWD_LENGTH > context->pwdlen) { - return ARGON2_PWD_TOO_SHORT; - } - - if (ARGON2_MAX_PWD_LENGTH < context->pwdlen) { - return ARGON2_PWD_TOO_LONG; - } - - /* Validate salt (required param) */ - if (NULL == context->salt) { - if (0 != context->saltlen) { - return ARGON2_SALT_PTR_MISMATCH; - } - } - - if (ARGON2_MIN_SALT_LENGTH > context->saltlen) { - return ARGON2_SALT_TOO_SHORT; - } - - if (ARGON2_MAX_SALT_LENGTH < context->saltlen) { - return ARGON2_SALT_TOO_LONG; - } - - /* Validate secret (optional param) */ - if (NULL == context->secret) { - if (0 != context->secretlen) { - return ARGON2_SECRET_PTR_MISMATCH; - } - } - else { - if (ARGON2_MIN_SECRET > context->secretlen) { - return ARGON2_SECRET_TOO_SHORT; - } - if (ARGON2_MAX_SECRET < context->secretlen) { - return ARGON2_SECRET_TOO_LONG; - } - } - - /* Validate associated data (optional param) */ - if (NULL == context->ad) { - if (0 != context->adlen) { - return ARGON2_AD_PTR_MISMATCH; - } - } - else { - if (ARGON2_MIN_AD_LENGTH > context->adlen) { - return ARGON2_AD_TOO_SHORT; - } - if (ARGON2_MAX_AD_LENGTH < context->adlen) { - return ARGON2_AD_TOO_LONG; - } - } - - /* Validate memory cost */ - if (ARGON2_MIN_MEMORY > context->m_cost) { - return ARGON2_MEMORY_TOO_LITTLE; - } - - if (ARGON2_MAX_MEMORY < context->m_cost) { - return ARGON2_MEMORY_TOO_MUCH; - } - - if (context->m_cost < 8 * context->lanes) { - return ARGON2_MEMORY_TOO_LITTLE; - } - - /* Validate time cost */ - if (ARGON2_MIN_TIME > context->t_cost) { - return ARGON2_TIME_TOO_SMALL; - } - - if (ARGON2_MAX_TIME < context->t_cost) { - return ARGON2_TIME_TOO_LARGE; - } - - /* Validate lanes */ - if (ARGON2_MIN_LANES > context->lanes) { - return ARGON2_LANES_TOO_FEW; - } - - if (ARGON2_MAX_LANES < context->lanes) { - return ARGON2_LANES_TOO_MANY; - } - - /* Validate threads */ - if (ARGON2_MIN_THREADS > context->threads) { - return ARGON2_THREADS_TOO_FEW; - } - - if (ARGON2_MAX_THREADS < context->threads) { - return ARGON2_THREADS_TOO_MANY; - } - - if (NULL != context->allocate_cbk && NULL == context->free_cbk) { - return ARGON2_FREE_MEMORY_CBK_NULL; - } - - if (NULL == context->allocate_cbk && NULL != context->free_cbk) { - return ARGON2_ALLOCATE_MEMORY_CBK_NULL; - } - - return ARGON2_OK; -} - -void rxa2_fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance) { - uint32_t l; - /* Make the first and second block in each lane as G(H0||0||i) or - G(H0||1||i) */ - uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE]; - for (l = 0; l < instance->lanes; ++l) { - - store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0); - store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l); - blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash, - ARGON2_PREHASH_SEED_LENGTH); - load_block(&instance->memory[l * instance->lane_length + 0], - blockhash_bytes); - - store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1); - blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash, - ARGON2_PREHASH_SEED_LENGTH); - load_block(&instance->memory[l * instance->lane_length + 1], - blockhash_bytes); - } -} - -void rxa2_initial_hash(uint8_t *blockhash, argon2_context *context, argon2_type type) { - blake2b_state BlakeHash; - uint8_t value[sizeof(uint32_t)]; - - if (NULL == context || NULL == blockhash) { - return; - } - - blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH); - - store32(&value, context->lanes); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - store32(&value, context->outlen); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - store32(&value, context->m_cost); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - store32(&value, context->t_cost); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - store32(&value, context->version); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - store32(&value, (uint32_t)type); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - store32(&value, context->pwdlen); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - if (context->pwd != NULL) { - blake2b_update(&BlakeHash, (const uint8_t *)context->pwd, - context->pwdlen); - } - - store32(&value, context->saltlen); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - if (context->salt != NULL) { - blake2b_update(&BlakeHash, (const uint8_t *)context->salt, context->saltlen); - } - - store32(&value, context->secretlen); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - if (context->secret != NULL) { - blake2b_update(&BlakeHash, (const uint8_t *)context->secret, - context->secretlen); - } - - store32(&value, context->adlen); - blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - if (context->ad != NULL) { - blake2b_update(&BlakeHash, (const uint8_t *)context->ad, - context->adlen); - } - - blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH); -} - -int randomx_argon2_initialize(argon2_instance_t *instance, argon2_context *context) { - uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; - int result = ARGON2_OK; - - if (instance == NULL || context == NULL) - return ARGON2_INCORRECT_PARAMETER; - instance->context_ptr = context; - - /* 1. Memory allocation */ - //RandomX takes care of memory allocation - - /* 2. Initial hashing */ - /* H_0 + 8 extra bytes to produce the first blocks */ - /* uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; */ - /* Hashing all inputs */ - rxa2_initial_hash(blockhash, context, instance->type); - /* Zeroing 8 extra bytes */ - /*rxa2_clear_internal_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, - ARGON2_PREHASH_SEED_LENGTH - - ARGON2_PREHASH_DIGEST_LENGTH);*/ - - /* 3. Creating first blocks, we always have at least two blocks in a slice - */ - rxa2_fill_first_blocks(blockhash, instance); - - return ARGON2_OK; -} diff --git a/external/src/randomx/src/argon2_core.h b/external/src/randomx/src/argon2_core.h deleted file mode 100644 index def27c6..0000000 --- a/external/src/randomx/src/argon2_core.h +++ /dev/null @@ -1,163 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* Original code from Argon2 reference source code package used under CC0 Licence - * https://github.com/P-H-C/phc-winner-argon2 - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves -*/ - -#ifndef ARGON2_CORE_H -#define ARGON2_CORE_H - -#include -#include "argon2.h" - -#if defined(__cplusplus) -extern "C" { -#endif - -#define CONST_CAST(x) (x)(uintptr_t) - - /**********************Argon2 internal constants*******************************/ - -enum argon2_core_constants { - /* Memory block size in bytes */ - ARGON2_BLOCK_SIZE = 1024, - ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8, - ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16, - ARGON2_HWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 32, - ARGON2_512BIT_WORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 64, - - /* Number of pseudo-random values generated by one call to Blake in Argon2i - to - generate reference block positions */ - ARGON2_ADDRESSES_IN_BLOCK = 128, - - /* Pre-hashing digest length and its extension*/ - ARGON2_PREHASH_DIGEST_LENGTH = 64, - ARGON2_PREHASH_SEED_LENGTH = 72 -}; - -/*************************Argon2 internal data types***********************/ - -/* - * Structure for the (1KB) memory block implemented as 128 64-bit words. - * Memory blocks can be copied, XORed. Internal words can be accessed by [] (no - * bounds checking). - */ -typedef struct block_ { uint64_t v[ARGON2_QWORDS_IN_BLOCK]; } block; - -/* - * Argon2 instance: memory pointer, number of passes, amount of memory, type, - * and derived values. - * Used to evaluate the number and location of blocks to construct in each - * thread - */ -typedef struct Argon2_instance_t { - block *memory; /* Memory pointer */ - uint32_t version; - uint32_t passes; /* Number of passes */ - uint32_t memory_blocks; /* Number of blocks in memory */ - uint32_t segment_length; - uint32_t lane_length; - uint32_t lanes; - uint32_t threads; - argon2_type type; - int print_internals; /* whether to print the memory blocks */ - argon2_context *context_ptr; /* points back to original context */ - randomx_argon2_impl *impl; -} argon2_instance_t; - -/* - * Argon2 position: where we construct the block right now. Used to distribute - * work between threads. - */ -typedef struct Argon2_position_t { - uint32_t pass; - uint32_t lane; - uint8_t slice; - uint32_t index; -} argon2_position_t; - -/*Struct that holds the inputs for thread handling FillSegment*/ -typedef struct Argon2_thread_data { - argon2_instance_t *instance_ptr; - argon2_position_t pos; -} argon2_thread_data; - -/*************************Argon2 core functions********************************/ - -/* - * Computes absolute position of reference block in the lane following a skewed - * distribution and using a pseudo-random value as input - * @param instance Pointer to the current instance - * @param position Pointer to the current position - * @param pseudo_rand 32-bit pseudo-random value used to determine the position - * @param same_lane Indicates if the block will be taken from the current lane. - * If so we can reference the current segment - * @pre All pointers must be valid - */ -uint32_t randomx_argon2_index_alpha(const argon2_instance_t *instance, - const argon2_position_t *position, uint32_t pseudo_rand, - int same_lane); - -/* - * Function that validates all inputs against predefined restrictions and return - * an error code - * @param context Pointer to current Argon2 context - * @return ARGON2_OK if everything is all right, otherwise one of error codes - * (all defined in - */ -int randomx_argon2_validate_inputs(const argon2_context *context); - -/* - * Function allocates memory, hashes the inputs with Blake, and creates first - * two blocks. Returns the pointer to the main memory with 2 blocks per lane - * initialized - * @param context Pointer to the Argon2 internal structure containing memory - * pointer, and parameters for time and space requirements. - * @param instance Current Argon2 instance - * @return Zero if successful, -1 if memory failed to allocate. @context->state - * will be modified if successful. - */ -int randomx_argon2_initialize(argon2_instance_t *instance, argon2_context *context); - -/* - * Function that fills the entire memory t_cost times based on the first two - * blocks in each lane - * @param instance Pointer to the current instance - * @return ARGON2_OK if successful, @context->state - */ -int randomx_argon2_fill_memory_blocks(argon2_instance_t* instance); - -#if defined(__cplusplus) -} -#endif - -#endif diff --git a/external/src/randomx/src/argon2_ref.c b/external/src/randomx/src/argon2_ref.c deleted file mode 100644 index dc4a804..0000000 --- a/external/src/randomx/src/argon2_ref.c +++ /dev/null @@ -1,187 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* Original code from Argon2 reference source code package used under CC0 Licence - * https://github.com/P-H-C/phc-winner-argon2 - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves -*/ - -#include -#include -#include - -#include "argon2.h" -#include "argon2_core.h" - -#include "blake2/blamka-round-ref.h" -#include "blake2/blake2-impl.h" -#include "blake2/blake2.h" - -static void copy_block(block* dst, const block* src) { - memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK); -} - -static void xor_block(block* dst, const block* src) { - int i; - for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { - dst->v[i] ^= src->v[i]; - } -} - - /* - * Function fills a new memory block and optionally XORs the old block over the new one. - * @next_block must be initialized. - * @param prev_block Pointer to the previous block - * @param ref_block Pointer to the reference block - * @param next_block Pointer to the block to be constructed - * @param with_xor Whether to XOR into the new block (1) or just overwrite (0) - * @pre all block pointers must be valid - */ -static void fill_block(const block *prev_block, const block *ref_block, - block *next_block, int with_xor) { - block blockR, block_tmp; - unsigned i; - - copy_block(&blockR, ref_block); - xor_block(&blockR, prev_block); - copy_block(&block_tmp, &blockR); - /* Now blockR = ref_block + prev_block and block_tmp = ref_block + prev_block */ - if (with_xor) { - /* Saving the next block contents for XOR over: */ - xor_block(&block_tmp, next_block); - /* Now blockR = ref_block + prev_block and - block_tmp = ref_block + prev_block + next_block */ - } - - /* Apply Blake2 on columns of 64-bit words: (0,1,...,15) , then - (16,17,..31)... finally (112,113,...127) */ - for (i = 0; i < 8; ++i) { - BLAKE2_ROUND_NOMSG( - blockR.v[16 * i], blockR.v[16 * i + 1], blockR.v[16 * i + 2], - blockR.v[16 * i + 3], blockR.v[16 * i + 4], blockR.v[16 * i + 5], - blockR.v[16 * i + 6], blockR.v[16 * i + 7], blockR.v[16 * i + 8], - blockR.v[16 * i + 9], blockR.v[16 * i + 10], blockR.v[16 * i + 11], - blockR.v[16 * i + 12], blockR.v[16 * i + 13], blockR.v[16 * i + 14], - blockR.v[16 * i + 15]); - } - - /* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then - (2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) */ - for (i = 0; i < 8; i++) { - BLAKE2_ROUND_NOMSG( - blockR.v[2 * i], blockR.v[2 * i + 1], blockR.v[2 * i + 16], - blockR.v[2 * i + 17], blockR.v[2 * i + 32], blockR.v[2 * i + 33], - blockR.v[2 * i + 48], blockR.v[2 * i + 49], blockR.v[2 * i + 64], - blockR.v[2 * i + 65], blockR.v[2 * i + 80], blockR.v[2 * i + 81], - blockR.v[2 * i + 96], blockR.v[2 * i + 97], blockR.v[2 * i + 112], - blockR.v[2 * i + 113]); - } - - copy_block(next_block, &block_tmp); - xor_block(next_block, &blockR); -} - -void randomx_argon2_fill_segment_ref(const argon2_instance_t *instance, - argon2_position_t position) { - block *ref_block = NULL, *curr_block = NULL; - block address_block, input_block, zero_block; - uint64_t pseudo_rand, ref_index, ref_lane; - uint32_t prev_offset, curr_offset; - uint32_t starting_index; - uint32_t i; - - if (instance == NULL) { - return; - } - - starting_index = 0; - - if ((0 == position.pass) && (0 == position.slice)) { - starting_index = 2; /* we have already generated the first two blocks */ - } - - /* Offset of the current block */ - curr_offset = position.lane * instance->lane_length + - position.slice * instance->segment_length + starting_index; - - if (0 == curr_offset % instance->lane_length) { - /* Last block in this lane */ - prev_offset = curr_offset + instance->lane_length - 1; - } - else { - /* Previous block */ - prev_offset = curr_offset - 1; - } - - for (i = starting_index; i < instance->segment_length; - ++i, ++curr_offset, ++prev_offset) { - /*1.1 Rotating prev_offset if needed */ - if (curr_offset % instance->lane_length == 1) { - prev_offset = curr_offset - 1; - } - - /* 1.2 Computing the index of the reference block */ - /* 1.2.1 Taking pseudo-random value from the previous block */ - pseudo_rand = instance->memory[prev_offset].v[0]; - - /* 1.2.2 Computing the lane of the reference block */ - ref_lane = ((pseudo_rand >> 32)) % instance->lanes; - - if ((position.pass == 0) && (position.slice == 0)) { - /* Can not reference other lanes yet */ - ref_lane = position.lane; - } - - /* 1.2.3 Computing the number of possible reference block within the - * lane. - */ - position.index = i; - ref_index = randomx_argon2_index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, - ref_lane == position.lane); - - /* 2 Creating a new block */ - ref_block = - instance->memory + instance->lane_length * ref_lane + ref_index; - curr_block = instance->memory + curr_offset; - if (ARGON2_VERSION_10 == instance->version) { - /* version 1.2.1 and earlier: overwrite, not XOR */ - fill_block(instance->memory + prev_offset, ref_block, curr_block, 0); - } - else { - if (0 == position.pass) { - fill_block(instance->memory + prev_offset, ref_block, - curr_block, 0); - } - else { - fill_block(instance->memory + prev_offset, ref_block, - curr_block, 1); - } - } - } -} diff --git a/external/src/randomx/src/argon2_ssse3.c b/external/src/randomx/src/argon2_ssse3.c deleted file mode 100644 index 778edd7..0000000 --- a/external/src/randomx/src/argon2_ssse3.c +++ /dev/null @@ -1,182 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* Original code from Argon2 reference source code package used under CC0 Licence - * https://github.com/P-H-C/phc-winner-argon2 - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves -*/ - -#include -#include -#include - -#include "argon2.h" - -#if defined(_MSC_VER) //MSVC doesn't define SSSE3 -#define __SSSE3__ -#endif - -void randomx_argon2_fill_segment_ssse3(const argon2_instance_t* instance, - argon2_position_t position); - -randomx_argon2_impl* randomx_argon2_impl_ssse3() { -#if defined(__SSSE3__) - return &randomx_argon2_fill_segment_ssse3; -#endif - return NULL; -} - -#if defined(__SSSE3__) - -#include /* for _mm_shuffle_epi8 and _mm_alignr_epi8 */ - -#include "argon2_core.h" - -#include "blake2/blamka-round-ssse3.h" -#include "blake2/blake2-impl.h" -#include "blake2/blake2.h" - -static void fill_block(__m128i* state, const block* ref_block, - block* next_block, int with_xor) { - __m128i block_XY[ARGON2_OWORDS_IN_BLOCK]; - unsigned int i; - - if (with_xor) { - for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { - state[i] = _mm_xor_si128( - state[i], _mm_loadu_si128((const __m128i*)ref_block->v + i)); - block_XY[i] = _mm_xor_si128( - state[i], _mm_loadu_si128((const __m128i*)next_block->v + i)); - } - } - else { - for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { - block_XY[i] = state[i] = _mm_xor_si128( - state[i], _mm_loadu_si128((const __m128i*)ref_block->v + i)); - } - } - - for (i = 0; i < 8; ++i) { - BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], - state[8 * i + 3], state[8 * i + 4], state[8 * i + 5], - state[8 * i + 6], state[8 * i + 7]); - } - - for (i = 0; i < 8; ++i) { - BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], - state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i], - state[8 * 6 + i], state[8 * 7 + i]); - } - - for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { - state[i] = _mm_xor_si128(state[i], block_XY[i]); - _mm_storeu_si128((__m128i*)next_block->v + i, state[i]); - } -} - -void randomx_argon2_fill_segment_ssse3(const argon2_instance_t* instance, - argon2_position_t position) { - block* ref_block = NULL, * curr_block = NULL; - block address_block, input_block; - uint64_t pseudo_rand, ref_index, ref_lane; - uint32_t prev_offset, curr_offset; - uint32_t starting_index, i; - __m128i state[ARGON2_OWORDS_IN_BLOCK]; - - if (instance == NULL) { - return; - } - - starting_index = 0; - - if ((0 == position.pass) && (0 == position.slice)) { - starting_index = 2; /* we have already generated the first two blocks */ - } - - /* Offset of the current block */ - curr_offset = position.lane * instance->lane_length + - position.slice * instance->segment_length + starting_index; - - if (0 == curr_offset % instance->lane_length) { - /* Last block in this lane */ - prev_offset = curr_offset + instance->lane_length - 1; - } - else { - /* Previous block */ - prev_offset = curr_offset - 1; - } - - memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE); - - for (i = starting_index; i < instance->segment_length; - ++i, ++curr_offset, ++prev_offset) { - /*1.1 Rotating prev_offset if needed */ - if (curr_offset % instance->lane_length == 1) { - prev_offset = curr_offset - 1; - } - - /* 1.2 Computing the index of the reference block */ - /* 1.2.1 Taking pseudo-random value from the previous block */ - pseudo_rand = instance->memory[prev_offset].v[0]; - - /* 1.2.2 Computing the lane of the reference block */ - ref_lane = ((pseudo_rand >> 32)) % instance->lanes; - - if ((position.pass == 0) && (position.slice == 0)) { - /* Can not reference other lanes yet */ - ref_lane = position.lane; - } - - /* 1.2.3 Computing the number of possible reference block within the - * lane. - */ - position.index = i; - ref_index = randomx_argon2_index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, - ref_lane == position.lane); - - /* 2 Creating a new block */ - ref_block = - instance->memory + instance->lane_length * ref_lane + ref_index; - curr_block = instance->memory + curr_offset; - if (ARGON2_VERSION_10 == instance->version) { - /* version 1.2.1 and earlier: overwrite, not XOR */ - fill_block(state, ref_block, curr_block, 0); - } - else { - if (0 == position.pass) { - fill_block(state, ref_block, curr_block, 0); - } - else { - fill_block(state, ref_block, curr_block, 1); - } - } - } -} - -#endif diff --git a/external/src/randomx/src/asm/configuration.asm b/external/src/randomx/src/asm/configuration.asm deleted file mode 100644 index 794d7ad..0000000 --- a/external/src/randomx/src/asm/configuration.asm +++ /dev/null @@ -1,48 +0,0 @@ -; File start: ..\src\configuration.h -RANDOMX_ARGON_MEMORY EQU 262144t -RANDOMX_ARGON_ITERATIONS EQU 3t -RANDOMX_ARGON_LANES EQU 1t -RANDOMX_ARGON_SALT TEXTEQU <"RandomX\x03"> -RANDOMX_CACHE_ACCESSES EQU 8t -RANDOMX_SUPERSCALAR_LATENCY EQU 170t -RANDOMX_DATASET_BASE_SIZE EQU 2147483648t -RANDOMX_DATASET_EXTRA_SIZE EQU 33554368t -RANDOMX_PROGRAM_SIZE EQU 256t -RANDOMX_PROGRAM_ITERATIONS EQU 2048t -RANDOMX_PROGRAM_COUNT EQU 8t -RANDOMX_SCRATCHPAD_L3 EQU 2097152t -RANDOMX_SCRATCHPAD_L2 EQU 262144t -RANDOMX_SCRATCHPAD_L1 EQU 16384t -RANDOMX_JUMP_BITS EQU 8t -RANDOMX_JUMP_OFFSET EQU 8t -RANDOMX_FREQ_IADD_RS EQU 16t -RANDOMX_FREQ_IADD_M EQU 7t -RANDOMX_FREQ_ISUB_R EQU 16t -RANDOMX_FREQ_ISUB_M EQU 7t -RANDOMX_FREQ_IMUL_R EQU 16t -RANDOMX_FREQ_IMUL_M EQU 4t -RANDOMX_FREQ_IMULH_R EQU 4t -RANDOMX_FREQ_IMULH_M EQU 1t -RANDOMX_FREQ_ISMULH_R EQU 4t -RANDOMX_FREQ_ISMULH_M EQU 1t -RANDOMX_FREQ_IMUL_RCP EQU 8t -RANDOMX_FREQ_INEG_R EQU 2t -RANDOMX_FREQ_IXOR_R EQU 15t -RANDOMX_FREQ_IXOR_M EQU 5t -RANDOMX_FREQ_IROR_R EQU 8t -RANDOMX_FREQ_IROL_R EQU 2t -RANDOMX_FREQ_ISWAP_R EQU 4t -RANDOMX_FREQ_FSWAP_R EQU 4t -RANDOMX_FREQ_FADD_R EQU 16t -RANDOMX_FREQ_FADD_M EQU 5t -RANDOMX_FREQ_FSUB_R EQU 16t -RANDOMX_FREQ_FSUB_M EQU 5t -RANDOMX_FREQ_FSCAL_R EQU 6t -RANDOMX_FREQ_FMUL_R EQU 32t -RANDOMX_FREQ_FDIV_M EQU 4t -RANDOMX_FREQ_FSQRT_R EQU 6t -RANDOMX_FREQ_CBRANCH EQU 25t -RANDOMX_FREQ_CFROUND EQU 1t -RANDOMX_FREQ_ISTORE EQU 16t -RANDOMX_FREQ_NOP EQU 0t -; File end: ..\src\configuration.h diff --git a/external/src/randomx/src/asm/program_epilogue_linux.inc b/external/src/randomx/src/asm/program_epilogue_linux.inc deleted file mode 100644 index eaacae5..0000000 --- a/external/src/randomx/src/asm/program_epilogue_linux.inc +++ /dev/null @@ -1,10 +0,0 @@ - ;# restore callee-saved registers - System V AMD64 ABI - pop r15 - pop r14 - pop r13 - pop r12 - pop rbp - pop rbx - - ;# program finished - ret 0 \ No newline at end of file diff --git a/external/src/randomx/src/asm/program_epilogue_store.inc b/external/src/randomx/src/asm/program_epilogue_store.inc deleted file mode 100644 index b94fa4d..0000000 --- a/external/src/randomx/src/asm/program_epilogue_store.inc +++ /dev/null @@ -1,19 +0,0 @@ - ;# save VM register values - pop rcx - mov qword ptr [rcx+0], r8 - mov qword ptr [rcx+8], r9 - mov qword ptr [rcx+16], r10 - mov qword ptr [rcx+24], r11 - mov qword ptr [rcx+32], r12 - mov qword ptr [rcx+40], r13 - mov qword ptr [rcx+48], r14 - mov qword ptr [rcx+56], r15 - movdqa xmmword ptr [rcx+64], xmm0 - movdqa xmmword ptr [rcx+80], xmm1 - movdqa xmmword ptr [rcx+96], xmm2 - movdqa xmmword ptr [rcx+112], xmm3 - lea rcx, [rcx+64] - movdqa xmmword ptr [rcx+64], xmm4 - movdqa xmmword ptr [rcx+80], xmm5 - movdqa xmmword ptr [rcx+96], xmm6 - movdqa xmmword ptr [rcx+112], xmm7 \ No newline at end of file diff --git a/external/src/randomx/src/asm/program_epilogue_win64.inc b/external/src/randomx/src/asm/program_epilogue_win64.inc deleted file mode 100644 index 8d70a0a..0000000 --- a/external/src/randomx/src/asm/program_epilogue_win64.inc +++ /dev/null @@ -1,24 +0,0 @@ - ;# restore callee-saved registers - Microsoft x64 calling convention - movdqu xmm15, xmmword ptr [rsp] - movdqu xmm14, xmmword ptr [rsp+16] - movdqu xmm13, xmmword ptr [rsp+32] - movdqu xmm12, xmmword ptr [rsp+48] - movdqu xmm11, xmmword ptr [rsp+64] - add rsp, 80 - movdqu xmm10, xmmword ptr [rsp] - movdqu xmm9, xmmword ptr [rsp+16] - movdqu xmm8, xmmword ptr [rsp+32] - movdqu xmm7, xmmword ptr [rsp+48] - movdqu xmm6, xmmword ptr [rsp+64] - add rsp, 80 - pop r15 - pop r14 - pop r13 - pop r12 - pop rsi - pop rdi - pop rbp - pop rbx - - ;# program finished - ret diff --git a/external/src/randomx/src/asm/program_loop_load.inc b/external/src/randomx/src/asm/program_loop_load.inc deleted file mode 100644 index c293323..0000000 --- a/external/src/randomx/src/asm/program_loop_load.inc +++ /dev/null @@ -1,28 +0,0 @@ - lea rcx, [rsi+rax] - push rcx - xor r8, qword ptr [rcx+0] - xor r9, qword ptr [rcx+8] - xor r10, qword ptr [rcx+16] - xor r11, qword ptr [rcx+24] - xor r12, qword ptr [rcx+32] - xor r13, qword ptr [rcx+40] - xor r14, qword ptr [rcx+48] - xor r15, qword ptr [rcx+56] - lea rcx, [rsi+rdx] - push rcx - cvtdq2pd xmm0, qword ptr [rcx+0] - cvtdq2pd xmm1, qword ptr [rcx+8] - cvtdq2pd xmm2, qword ptr [rcx+16] - cvtdq2pd xmm3, qword ptr [rcx+24] - cvtdq2pd xmm4, qword ptr [rcx+32] - cvtdq2pd xmm5, qword ptr [rcx+40] - cvtdq2pd xmm6, qword ptr [rcx+48] - cvtdq2pd xmm7, qword ptr [rcx+56] - andps xmm4, xmm13 - andps xmm5, xmm13 - andps xmm6, xmm13 - andps xmm7, xmm13 - orps xmm4, xmm14 - orps xmm5, xmm14 - orps xmm6, xmm14 - orps xmm7, xmm14 diff --git a/external/src/randomx/src/asm/program_loop_store.inc b/external/src/randomx/src/asm/program_loop_store.inc deleted file mode 100644 index 1ba1635..0000000 --- a/external/src/randomx/src/asm/program_loop_store.inc +++ /dev/null @@ -1,18 +0,0 @@ - pop rcx - mov qword ptr [rcx+0], r8 - mov qword ptr [rcx+8], r9 - mov qword ptr [rcx+16], r10 - mov qword ptr [rcx+24], r11 - mov qword ptr [rcx+32], r12 - mov qword ptr [rcx+40], r13 - mov qword ptr [rcx+48], r14 - mov qword ptr [rcx+56], r15 - pop rcx - xorpd xmm0, xmm4 - xorpd xmm1, xmm5 - xorpd xmm2, xmm6 - xorpd xmm3, xmm7 - movapd xmmword ptr [rcx+0], xmm0 - movapd xmmword ptr [rcx+16], xmm1 - movapd xmmword ptr [rcx+32], xmm2 - movapd xmmword ptr [rcx+48], xmm3 diff --git a/external/src/randomx/src/asm/program_prologue_linux.inc b/external/src/randomx/src/asm/program_prologue_linux.inc deleted file mode 100644 index 033584a..0000000 --- a/external/src/randomx/src/asm/program_prologue_linux.inc +++ /dev/null @@ -1,35 +0,0 @@ - ;# callee-saved registers - System V AMD64 ABI - push rbx - push rbp - push r12 - push r13 - push r14 - push r15 - - ;# function arguments - mov rbx, rcx ;# loop counter - push rdi ;# RegisterFile& registerFile - mov rcx, rdi - mov rbp, qword ptr [rsi] ;# "mx", "ma" - mov rdi, qword ptr [rsi+8] ;# uint8_t* dataset - mov rsi, rdx ;# uint8_t* scratchpad - - mov rax, rbp - ror rbp, 32 - - ;# zero integer registers - xor r8, r8 - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - - ;# load constant registers - lea rcx, [rcx+120] - movapd xmm8, xmmword ptr [rcx+72] - movapd xmm9, xmmword ptr [rcx+88] - movapd xmm10, xmmword ptr [rcx+104] - movapd xmm11, xmmword ptr [rcx+120] diff --git a/external/src/randomx/src/asm/program_prologue_win64.inc b/external/src/randomx/src/asm/program_prologue_win64.inc deleted file mode 100644 index 10f21d3..0000000 --- a/external/src/randomx/src/asm/program_prologue_win64.inc +++ /dev/null @@ -1,48 +0,0 @@ - ;# callee-saved registers - Microsoft x64 calling convention - push rbx - push rbp - push rdi - push rsi - push r12 - push r13 - push r14 - push r15 - sub rsp, 80 - movdqu xmmword ptr [rsp+64], xmm6 - movdqu xmmword ptr [rsp+48], xmm7 - movdqu xmmword ptr [rsp+32], xmm8 - movdqu xmmword ptr [rsp+16], xmm9 - movdqu xmmword ptr [rsp+0], xmm10 - sub rsp, 80 - movdqu xmmword ptr [rsp+64], xmm11 - movdqu xmmword ptr [rsp+48], xmm12 - movdqu xmmword ptr [rsp+32], xmm13 - movdqu xmmword ptr [rsp+16], xmm14 - movdqu xmmword ptr [rsp+0], xmm15 - - ;# function arguments - push rcx ;# RegisterFile& registerFile - mov rbp, qword ptr [rdx] ;# "mx", "ma" - mov rdi, qword ptr [rdx+8] ;# uint8_t* dataset - mov rsi, r8 ;# uint8_t* scratchpad - mov rbx, r9 ;# loop counter - - mov rax, rbp - ror rbp, 32 - - ;# zero integer registers - xor r8, r8 - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - - ;# load constant registers - lea rcx, [rcx+120] - movapd xmm8, xmmword ptr [rcx+72] - movapd xmm9, xmmword ptr [rcx+88] - movapd xmm10, xmmword ptr [rcx+104] - movapd xmm11, xmmword ptr [rcx+120] diff --git a/external/src/randomx/src/asm/program_read_dataset.inc b/external/src/randomx/src/asm/program_read_dataset.inc deleted file mode 100644 index 9c61092..0000000 --- a/external/src/randomx/src/asm/program_read_dataset.inc +++ /dev/null @@ -1,16 +0,0 @@ - mov ecx, ebp ;# ecx = ma - and ecx, RANDOMX_DATASET_BASE_MASK - xor r8, qword ptr [rdi+rcx] - ror rbp, 32 ;# swap "ma" and "mx" - xor rbp, rax ;# modify "mx" - mov edx, ebp ;# edx = mx - and edx, RANDOMX_DATASET_BASE_MASK - prefetchnta byte ptr [rdi+rdx] - xor r9, qword ptr [rdi+rcx+8] - xor r10, qword ptr [rdi+rcx+16] - xor r11, qword ptr [rdi+rcx+24] - xor r12, qword ptr [rdi+rcx+32] - xor r13, qword ptr [rdi+rcx+40] - xor r14, qword ptr [rdi+rcx+48] - xor r15, qword ptr [rdi+rcx+56] - \ No newline at end of file diff --git a/external/src/randomx/src/asm/program_read_dataset_sshash_fin.inc b/external/src/randomx/src/asm/program_read_dataset_sshash_fin.inc deleted file mode 100644 index f5a067d..0000000 --- a/external/src/randomx/src/asm/program_read_dataset_sshash_fin.inc +++ /dev/null @@ -1,10 +0,0 @@ - mov rbx, qword ptr [rsp+64] - xor r8, qword ptr [rsp+56] - xor r9, qword ptr [rsp+48] - xor r10, qword ptr [rsp+40] - xor r11, qword ptr [rsp+32] - xor r12, qword ptr [rsp+24] - xor r13, qword ptr [rsp+16] - xor r14, qword ptr [rsp+8] - xor r15, qword ptr [rsp+0] - add rsp, 72 \ No newline at end of file diff --git a/external/src/randomx/src/asm/program_read_dataset_sshash_init.inc b/external/src/randomx/src/asm/program_read_dataset_sshash_init.inc deleted file mode 100644 index 9491f3d..0000000 --- a/external/src/randomx/src/asm/program_read_dataset_sshash_init.inc +++ /dev/null @@ -1,17 +0,0 @@ - sub rsp, 72 - mov qword ptr [rsp+64], rbx - mov qword ptr [rsp+56], r8 - mov qword ptr [rsp+48], r9 - mov qword ptr [rsp+40], r10 - mov qword ptr [rsp+32], r11 - mov qword ptr [rsp+24], r12 - mov qword ptr [rsp+16], r13 - mov qword ptr [rsp+8], r14 - mov qword ptr [rsp+0], r15 - ror rbp, 32 ;# swap "ma" and "mx" - xor rbp, rax ;# modify "mx" - mov rbx, rbp ;# ebx = ma - shr rbx, 38 - and ebx, RANDOMX_DATASET_BASE_MASK / 64 ;# ebx = Dataset block number - ;# add ebx, datasetOffset / 64 - ;# call 32768 \ No newline at end of file diff --git a/external/src/randomx/src/asm/program_sshash_constants.inc b/external/src/randomx/src/asm/program_sshash_constants.inc deleted file mode 100644 index 53dc175..0000000 --- a/external/src/randomx/src/asm/program_sshash_constants.inc +++ /dev/null @@ -1,24 +0,0 @@ -r0_mul: - ;#/ 6364136223846793005 - db 45, 127, 149, 76, 45, 244, 81, 88 -r1_add: - ;#/ 9298411001130361340 - db 252, 161, 245, 89, 138, 151, 10, 129 -r2_add: - ;#/ 12065312585734608966 - db 70, 216, 194, 56, 223, 153, 112, 167 -r3_add: - ;#/ 9306329213124626780 - db 92, 73, 34, 191, 28, 185, 38, 129 -r4_add: - ;#/ 5281919268842080866 - db 98, 138, 159, 23, 151, 37, 77, 73 -r5_add: - ;#/ 10536153434571861004 - db 12, 236, 170, 206, 185, 239, 55, 146 -r6_add: - ;#/ 3398623926847679864 - db 120, 45, 230, 108, 116, 86, 42, 47 -r7_add: - ;#/ 9549104520008361294 - db 78, 229, 44, 182, 247, 59, 133, 132 \ No newline at end of file diff --git a/external/src/randomx/src/asm/program_sshash_load.inc b/external/src/randomx/src/asm/program_sshash_load.inc deleted file mode 100644 index 5351356..0000000 --- a/external/src/randomx/src/asm/program_sshash_load.inc +++ /dev/null @@ -1,8 +0,0 @@ - xor r8, qword ptr [rbx+0] - xor r9, qword ptr [rbx+8] - xor r10, qword ptr [rbx+16] - xor r11, qword ptr [rbx+24] - xor r12, qword ptr [rbx+32] - xor r13, qword ptr [rbx+40] - xor r14, qword ptr [rbx+48] - xor r15, qword ptr [rbx+56] \ No newline at end of file diff --git a/external/src/randomx/src/asm/program_sshash_prefetch.inc b/external/src/randomx/src/asm/program_sshash_prefetch.inc deleted file mode 100644 index 26efb51..0000000 --- a/external/src/randomx/src/asm/program_sshash_prefetch.inc +++ /dev/null @@ -1,4 +0,0 @@ - and rbx, RANDOMX_CACHE_MASK - shl rbx, 6 - add rbx, rdi - prefetchnta byte ptr [rbx] \ No newline at end of file diff --git a/external/src/randomx/src/asm/program_xmm_constants.inc b/external/src/randomx/src/asm/program_xmm_constants.inc deleted file mode 100644 index 296237a..0000000 --- a/external/src/randomx/src/asm/program_xmm_constants.inc +++ /dev/null @@ -1,6 +0,0 @@ -mantissaMask: - db 255, 255, 255, 255, 255, 255, 255, 0, 255, 255, 255, 255, 255, 255, 255, 0 -exp240: - db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -scaleMask: - db 0, 0, 0, 0, 0, 0, 240, 128, 0, 0, 0, 0, 0, 0, 240, 128 \ No newline at end of file diff --git a/external/src/randomx/src/asm/randomx_reciprocal.inc b/external/src/randomx/src/asm/randomx_reciprocal.inc deleted file mode 100644 index e1f22fd..0000000 --- a/external/src/randomx/src/asm/randomx_reciprocal.inc +++ /dev/null @@ -1,7 +0,0 @@ - mov edx, 1 - mov r8, rcx - xor eax, eax - bsr rcx, rcx - shl rdx, cl - div r8 - ret \ No newline at end of file diff --git a/external/src/randomx/src/assembly_generator_x86.cpp b/external/src/randomx/src/assembly_generator_x86.cpp deleted file mode 100644 index e7e5258..0000000 --- a/external/src/randomx/src/assembly_generator_x86.cpp +++ /dev/null @@ -1,611 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include -#include "assembly_generator_x86.hpp" -#include "common.hpp" -#include "reciprocal.h" -#include "program.hpp" -#include "superscalar.hpp" - -namespace randomx { - - static const char* regR[] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" }; - static const char* regR32[] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" }; - static const char* regFE[] = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" }; - static const char* regF[] = { "xmm0", "xmm1", "xmm2", "xmm3" }; - static const char* regE[] = { "xmm4", "xmm5", "xmm6", "xmm7" }; - static const char* regA[] = { "xmm8", "xmm9", "xmm10", "xmm11" }; - - static const char* tempRegx = "xmm12"; - static const char* mantissaMaskReg = "xmm13"; - static const char* exponentMaskReg = "xmm14"; - static const char* scaleMaskReg = "xmm15"; - static const char* regIc = "rbx"; - static const char* regIc32 = "ebx"; - static const char* regIc8 = "bl"; - static const char* regScratchpadAddr = "rsi"; - - void AssemblyGeneratorX86::generateProgram(Program& prog) { - for (unsigned i = 0; i < RegistersCount; ++i) { - registerUsage[i] = -1; - } - asmCode.str(std::string()); //clear - for (unsigned i = 0; i < prog.getSize(); ++i) { - asmCode << "randomx_isn_" << i << ":" << std::endl; - Instruction& instr = prog(i); - instr.src %= RegistersCount; - instr.dst %= RegistersCount; - generateCode(instr, i); - } - } - - void AssemblyGeneratorX86::generateAsm(SuperscalarProgram& prog) { - asmCode.str(std::string()); //clear -#ifdef RANDOMX_ALIGN - asmCode << "ALIGN 16" << std::endl; -#endif - for (unsigned i = 0; i < prog.getSize(); ++i) { - Instruction& instr = prog(i); - switch ((SuperscalarInstructionType)instr.opcode) - { - case SuperscalarInstructionType::ISUB_R: - asmCode << "sub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; - break; - case SuperscalarInstructionType::IXOR_R: - asmCode << "xor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; - break; - case SuperscalarInstructionType::IADD_RS: - asmCode << "lea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift())) << "]" << std::endl; - break; - case SuperscalarInstructionType::IMUL_R: - asmCode << "imul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; - break; - case SuperscalarInstructionType::IROR_C: - asmCode << "ror " << regR[instr.dst] << ", " << instr.getImm32() << std::endl; - break; - case SuperscalarInstructionType::IADD_C7: - asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; - break; - case SuperscalarInstructionType::IXOR_C7: - asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; - break; - case SuperscalarInstructionType::IADD_C8: - asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; -#ifdef RANDOMX_ALIGN - asmCode << "nop" << std::endl; -#endif - break; - case SuperscalarInstructionType::IXOR_C8: - asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; -#ifdef RANDOMX_ALIGN - asmCode << "nop" << std::endl; -#endif - break; - case SuperscalarInstructionType::IADD_C9: - asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; -#ifdef RANDOMX_ALIGN - asmCode << "xchg ax, ax ;nop" << std::endl; -#endif - break; - case SuperscalarInstructionType::IXOR_C9: - asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; -#ifdef RANDOMX_ALIGN - asmCode << "xchg ax, ax ;nop" << std::endl; -#endif - break; - case SuperscalarInstructionType::IMULH_R: - asmCode << "mov rax, " << regR[instr.dst] << std::endl; - asmCode << "mul " << regR[instr.src] << std::endl; - asmCode << "mov " << regR[instr.dst] << ", rdx" << std::endl; - break; - case SuperscalarInstructionType::ISMULH_R: - asmCode << "mov rax, " << regR[instr.dst] << std::endl; - asmCode << "imul " << regR[instr.src] << std::endl; - asmCode << "mov " << regR[instr.dst] << ", rdx" << std::endl; - break; - case SuperscalarInstructionType::IMUL_RCP: - asmCode << "mov rax, " << (int64_t)randomx_reciprocal(instr.getImm32()) << std::endl; - asmCode << "imul " << regR[instr.dst] << ", rax" << std::endl; - break; - default: - UNREACHABLE; - } - } - } - - void AssemblyGeneratorX86::generateC(SuperscalarProgram& prog) { - asmCode.str(std::string()); //clear - asmCode << "#include " << std::endl; - asmCode << "#if defined(__SIZEOF_INT128__)" << std::endl; - asmCode << " static inline uint64_t mulh(uint64_t a, uint64_t b) {" << std::endl; - asmCode << " return ((unsigned __int128)a * b) >> 64;" << std::endl; - asmCode << " }" << std::endl; - asmCode << " static inline int64_t smulh(int64_t a, int64_t b) {" << std::endl; - asmCode << " return ((__int128)a * b) >> 64;" << std::endl; - asmCode << " }" << std::endl; - asmCode << " #define HAVE_MULH" << std::endl; - asmCode << " #define HAVE_SMULH" << std::endl; - asmCode << "#endif" << std::endl; - asmCode << "#if defined(_MSC_VER)" << std::endl; - asmCode << " #define HAS_VALUE(X) X ## 0" << std::endl; - asmCode << " #define EVAL_DEFINE(X) HAS_VALUE(X)" << std::endl; - asmCode << " #include " << std::endl; - asmCode << " #include " << std::endl; - asmCode << " static __inline uint64_t rotr(uint64_t x , int c) {" << std::endl; - asmCode << " return _rotr64(x, c);" << std::endl; - asmCode << " }" << std::endl; - asmCode << " #define HAVE_ROTR" << std::endl; - asmCode << " #if EVAL_DEFINE(__MACHINEARM64_X64(1))" << std::endl; - asmCode << " static __inline uint64_t mulh(uint64_t a, uint64_t b) {" << std::endl; - asmCode << " return __umulh(a, b);" << std::endl; - asmCode << " }" << std::endl; - asmCode << " #define HAVE_MULH" << std::endl; - asmCode << " #endif" << std::endl; - asmCode << " #if EVAL_DEFINE(__MACHINEX64(1))" << std::endl; - asmCode << " static __inline int64_t smulh(int64_t a, int64_t b) {" << std::endl; - asmCode << " int64_t hi;" << std::endl; - asmCode << " _mul128(a, b, &hi);" << std::endl; - asmCode << " return hi;" << std::endl; - asmCode << " }" << std::endl; - asmCode << " #define HAVE_SMULH" << std::endl; - asmCode << " #endif" << std::endl; - asmCode << "#endif" << std::endl; - asmCode << "#ifndef HAVE_ROTR" << std::endl; - asmCode << " static inline uint64_t rotr(uint64_t a, int b) {" << std::endl; - asmCode << " return (a >> b) | (a << (64 - b));" << std::endl; - asmCode << " }" << std::endl; - asmCode << " #define HAVE_ROTR" << std::endl; - asmCode << "#endif" << std::endl; - asmCode << "#if !defined(HAVE_MULH) || !defined(HAVE_SMULH) || !defined(HAVE_ROTR)" << std::endl; - asmCode << " #error \"Required functions are not defined\"" << std::endl; - asmCode << "#endif" << std::endl; - asmCode << "void superScalar(uint64_t r[8]) {" << std::endl; - asmCode << "uint64_t r8 = r[0], r9 = r[1], r10 = r[2], r11 = r[3], r12 = r[4], r13 = r[5], r14 = r[6], r15 = r[7];" << std::endl; - for (unsigned i = 0; i < prog.getSize(); ++i) { - Instruction& instr = prog(i); - switch ((SuperscalarInstructionType)instr.opcode) - { - case SuperscalarInstructionType::ISUB_R: - asmCode << regR[instr.dst] << " -= " << regR[instr.src] << ";" << std::endl; - break; - case SuperscalarInstructionType::IXOR_R: - asmCode << regR[instr.dst] << " ^= " << regR[instr.src] << ";" << std::endl; - break; - case SuperscalarInstructionType::IADD_RS: - asmCode << regR[instr.dst] << " += " << regR[instr.src] << "*" << (1 << (instr.getModShift())) << ";" << std::endl; - break; - case SuperscalarInstructionType::IMUL_R: - asmCode << regR[instr.dst] << " *= " << regR[instr.src] << ";" << std::endl; - break; - case SuperscalarInstructionType::IROR_C: - asmCode << regR[instr.dst] << " = rotr(" << regR[instr.dst] << ", " << instr.getImm32() << ");" << std::endl; - break; - case SuperscalarInstructionType::IADD_C7: - case SuperscalarInstructionType::IADD_C8: - case SuperscalarInstructionType::IADD_C9: - asmCode << regR[instr.dst] << " += " << (int32_t)instr.getImm32() << ";" << std::endl; - break; - case SuperscalarInstructionType::IXOR_C7: - case SuperscalarInstructionType::IXOR_C8: - case SuperscalarInstructionType::IXOR_C9: - asmCode << regR[instr.dst] << " ^= " << (int32_t)instr.getImm32() << ";" << std::endl; - break; - case SuperscalarInstructionType::IMULH_R: - asmCode << regR[instr.dst] << " = mulh(" << regR[instr.dst] << ", " << regR[instr.src] << ");" << std::endl; - break; - case SuperscalarInstructionType::ISMULH_R: - asmCode << regR[instr.dst] << " = smulh(" << regR[instr.dst] << ", " << regR[instr.src] << ");" << std::endl; - break; - case SuperscalarInstructionType::IMUL_RCP: - asmCode << regR[instr.dst] << " *= " << (int64_t)randomx_reciprocal(instr.getImm32()) << ";" << std::endl; - break; - default: - UNREACHABLE; - } - } - asmCode << "r[0] = r8; r[1] = r9; r[2] = r10; r[3] = r11; r[4] = r12; r[5] = r13; r[6] = r14; r[7] = r15;" << std::endl; - asmCode << "}" << std::endl; - } - - void AssemblyGeneratorX86::traceint(Instruction& instr) { - if (trace) { - asmCode << "\tpush " << regR[instr.dst] << std::endl; - } - } - - void AssemblyGeneratorX86::traceflt(Instruction& instr) { - if (trace) { - asmCode << "\tpush 0" << std::endl; - } - } - - void AssemblyGeneratorX86::tracenop(Instruction& instr) { - if (trace) { - asmCode << "\tpush 0" << std::endl; - } - } - - void AssemblyGeneratorX86::generateCode(Instruction& instr, int i) { - asmCode << "\t; " << instr; - auto generator = engine[instr.opcode]; - (this->*generator)(instr, i); - } - - void AssemblyGeneratorX86::genAddressReg(Instruction& instr, const char* reg = "eax") { - asmCode << "\tlea " << reg << ", [" << regR32[instr.src] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; - asmCode << "\tand " << reg << ", " << ((instr.getModMem()) ? ScratchpadL1Mask : ScratchpadL2Mask) << std::endl; - } - - void AssemblyGeneratorX86::genAddressRegDst(Instruction& instr, int maskAlign = 8) { - asmCode << "\tlea eax, [" << regR32[instr.dst] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; - int mask; - if (instr.getModCond() < StoreL3Condition) { - mask = instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask; - } - else { - mask = ScratchpadL3Mask; - } - asmCode << "\tand eax" << ", " << (mask & (-maskAlign)) << std::endl; - } - - int32_t AssemblyGeneratorX86::genAddressImm(Instruction& instr) { - return (int32_t)instr.getImm32() & ScratchpadL3Mask; - } - - void AssemblyGeneratorX86::h_IADD_RS(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - if(instr.dst == RegisterNeedsDisplacement) - asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift())) << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; - else - asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift())) << "]" << std::endl; - traceint(instr); - } - - void AssemblyGeneratorX86::h_IADD_M(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - if (instr.src != instr.dst) { - genAddressReg(instr); - asmCode << "\tadd " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; - } - else { - asmCode << "\tadd " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl; - } - traceint(instr); - } - - void AssemblyGeneratorX86::h_ISUB_R(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - if (instr.src != instr.dst) { - asmCode << "\tsub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; - } - else { - asmCode << "\tsub " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; - } - traceint(instr); - } - - void AssemblyGeneratorX86::h_ISUB_M(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - if (instr.src != instr.dst) { - genAddressReg(instr); - asmCode << "\tsub " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; - } - else { - asmCode << "\tsub " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl; - } - traceint(instr); - } - - void AssemblyGeneratorX86::h_IMUL_R(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - if (instr.src != instr.dst) { - asmCode << "\timul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; - } - else { - asmCode << "\timul " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; - } - traceint(instr); - } - - void AssemblyGeneratorX86::h_IMUL_M(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - if (instr.src != instr.dst) { - genAddressReg(instr); - asmCode << "\timul " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; - } - else { - asmCode << "\timul " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl; - } - traceint(instr); - } - - void AssemblyGeneratorX86::h_IMULH_R(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; - asmCode << "\tmul " << regR[instr.src] << std::endl; - asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl; - traceint(instr); - } - - void AssemblyGeneratorX86::h_IMULH_M(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - if (instr.src != instr.dst) { - genAddressReg(instr, "ecx"); - asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; - asmCode << "\tmul qword ptr [" << regScratchpadAddr << "+rcx]" << std::endl; - } - else { - asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; - asmCode << "\tmul qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl; - } - asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl; - traceint(instr); - } - - void AssemblyGeneratorX86::h_ISMULH_R(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; - asmCode << "\timul " << regR[instr.src] << std::endl; - asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl; - traceint(instr); - } - - void AssemblyGeneratorX86::h_ISMULH_M(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - if (instr.src != instr.dst) { - genAddressReg(instr, "ecx"); - asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; - asmCode << "\timul qword ptr [" << regScratchpadAddr << "+rcx]" << std::endl; - } - else { - asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; - asmCode << "\timul qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl; - } - asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl; - traceint(instr); - } - - void AssemblyGeneratorX86::h_INEG_R(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - asmCode << "\tneg " << regR[instr.dst] << std::endl; - traceint(instr); - } - - void AssemblyGeneratorX86::h_IXOR_R(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - if (instr.src != instr.dst) { - asmCode << "\txor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; - } - else { - asmCode << "\txor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; - } - traceint(instr); - } - - void AssemblyGeneratorX86::h_IXOR_M(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - if (instr.src != instr.dst) { - genAddressReg(instr); - asmCode << "\txor " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; - } - else { - asmCode << "\txor " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl; - } - traceint(instr); - } - - void AssemblyGeneratorX86::h_IROR_R(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - if (instr.src != instr.dst) { - asmCode << "\tmov ecx, " << regR32[instr.src] << std::endl; - asmCode << "\tror " << regR[instr.dst] << ", cl" << std::endl; - } - else { - asmCode << "\tror " << regR[instr.dst] << ", " << (instr.getImm32() & 63) << std::endl; - } - traceint(instr); - } - - void AssemblyGeneratorX86::h_IROL_R(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - if (instr.src != instr.dst) { - asmCode << "\tmov ecx, " << regR32[instr.src] << std::endl; - asmCode << "\trol " << regR[instr.dst] << ", cl" << std::endl; - } - else { - asmCode << "\trol " << regR[instr.dst] << ", " << (instr.getImm32() & 63) << std::endl; - } - traceint(instr); - } - - void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) { - uint64_t divisor = instr.getImm32(); - if (!isZeroOrPowerOf2(divisor)) { - registerUsage[instr.dst] = i; - asmCode << "\tmov rax, " << randomx_reciprocal(divisor) << std::endl; - asmCode << "\timul " << regR[instr.dst] << ", rax" << std::endl; - traceint(instr); - } - else { - tracenop(instr); - } - } - - void AssemblyGeneratorX86::h_ISWAP_R(Instruction& instr, int i) { - if (instr.src != instr.dst) { - registerUsage[instr.dst] = i; - registerUsage[instr.src] = i; - asmCode << "\txchg " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; - traceint(instr); - } - else { - tracenop(instr); - } - } - - void AssemblyGeneratorX86::h_FSWAP_R(Instruction& instr, int i) { - asmCode << "\tshufpd " << regFE[instr.dst] << ", " << regFE[instr.dst] << ", 1" << std::endl; - traceflt(instr); - } - - void AssemblyGeneratorX86::h_FADD_R(Instruction& instr, int i) { - instr.dst %= RegisterCountFlt; - instr.src %= RegisterCountFlt; - asmCode << "\taddpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl; - traceflt(instr); - } - - void AssemblyGeneratorX86::h_FADD_M(Instruction& instr, int i) { - instr.dst %= RegisterCountFlt; - genAddressReg(instr); - asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; - asmCode << "\taddpd " << regF[instr.dst] << ", " << tempRegx << std::endl; - traceflt(instr); - } - - void AssemblyGeneratorX86::h_FSUB_R(Instruction& instr, int i) { - instr.dst %= RegisterCountFlt; - instr.src %= RegisterCountFlt; - asmCode << "\tsubpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl; - traceflt(instr); - } - - void AssemblyGeneratorX86::h_FSUB_M(Instruction& instr, int i) { - instr.dst %= RegisterCountFlt; - genAddressReg(instr); - asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; - asmCode << "\tsubpd " << regF[instr.dst] << ", " << tempRegx << std::endl; - traceflt(instr); - } - - void AssemblyGeneratorX86::h_FSCAL_R(Instruction& instr, int i) { - instr.dst %= RegisterCountFlt; - asmCode << "\txorps " << regF[instr.dst] << ", " << scaleMaskReg << std::endl; - traceflt(instr); - } - - void AssemblyGeneratorX86::h_FMUL_R(Instruction& instr, int i) { - instr.dst %= RegisterCountFlt; - instr.src %= RegisterCountFlt; - asmCode << "\tmulpd " << regE[instr.dst] << ", " << regA[instr.src] << std::endl; - traceflt(instr); - } - - void AssemblyGeneratorX86::h_FDIV_M(Instruction& instr, int i) { - instr.dst %= RegisterCountFlt; - genAddressReg(instr); - asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; - asmCode << "\tandps " << tempRegx << ", " << mantissaMaskReg << std::endl; - asmCode << "\torps " << tempRegx << ", " << exponentMaskReg << std::endl; - asmCode << "\tdivpd " << regE[instr.dst] << ", " << tempRegx << std::endl; - traceflt(instr); - } - - void AssemblyGeneratorX86::h_FSQRT_R(Instruction& instr, int i) { - instr.dst %= RegisterCountFlt; - asmCode << "\tsqrtpd " << regE[instr.dst] << ", " << regE[instr.dst] << std::endl; - traceflt(instr); - } - - void AssemblyGeneratorX86::h_CFROUND(Instruction& instr, int i) { - asmCode << "\tmov rax, " << regR[instr.src] << std::endl; - int rotate = (13 - (instr.getImm32() & 63)) & 63; - if (rotate != 0) - asmCode << "\trol rax, " << rotate << std::endl; - asmCode << "\tand eax, 24576" << std::endl; - asmCode << "\tor eax, 40896" << std::endl; - asmCode << "\tpush rax" << std::endl; - asmCode << "\tldmxcsr dword ptr [rsp]" << std::endl; - asmCode << "\tpop rax" << std::endl; - tracenop(instr); - } - - void AssemblyGeneratorX86::h_CBRANCH(Instruction& instr, int i) { - int reg = instr.dst; - int target = registerUsage[reg] + 1; - int shift = instr.getModCond() + ConditionOffset; - int32_t imm = instr.getImm32() | (1L << shift); - if (ConditionOffset > 0 || shift > 0) - imm &= ~(1L << (shift - 1)); - asmCode << "\tadd " << regR[reg] << ", " << imm << std::endl; - asmCode << "\ttest " << regR[reg] << ", " << (ConditionMask << shift) << std::endl; - asmCode << "\tjz randomx_isn_" << target << std::endl; - //mark all registers as used - for (unsigned j = 0; j < RegistersCount; ++j) { - registerUsage[j] = i; - } - } - - void AssemblyGeneratorX86::h_ISTORE(Instruction& instr, int i) { - genAddressRegDst(instr); - asmCode << "\tmov qword ptr [" << regScratchpadAddr << "+rax], " << regR[instr.src] << std::endl; - tracenop(instr); - } - - void AssemblyGeneratorX86::h_NOP(Instruction& instr, int i) { - asmCode << "\tnop" << std::endl; - tracenop(instr); - } - -#include "instruction_weights.hpp" -#define INST_HANDLE(x) REPN(&AssemblyGeneratorX86::h_##x, WT(x)) - - InstructionGenerator AssemblyGeneratorX86::engine[256] = { - INST_HANDLE(IADD_RS) - INST_HANDLE(IADD_M) - INST_HANDLE(ISUB_R) - INST_HANDLE(ISUB_M) - INST_HANDLE(IMUL_R) - INST_HANDLE(IMUL_M) - INST_HANDLE(IMULH_R) - INST_HANDLE(IMULH_M) - INST_HANDLE(ISMULH_R) - INST_HANDLE(ISMULH_M) - INST_HANDLE(IMUL_RCP) - INST_HANDLE(INEG_R) - INST_HANDLE(IXOR_R) - INST_HANDLE(IXOR_M) - INST_HANDLE(IROR_R) - INST_HANDLE(IROL_R) - INST_HANDLE(ISWAP_R) - INST_HANDLE(FSWAP_R) - INST_HANDLE(FADD_R) - INST_HANDLE(FADD_M) - INST_HANDLE(FSUB_R) - INST_HANDLE(FSUB_M) - INST_HANDLE(FSCAL_R) - INST_HANDLE(FMUL_R) - INST_HANDLE(FDIV_M) - INST_HANDLE(FSQRT_R) - INST_HANDLE(CBRANCH) - INST_HANDLE(CFROUND) - INST_HANDLE(ISTORE) - INST_HANDLE(NOP) - }; -} \ No newline at end of file diff --git a/external/src/randomx/src/assembly_generator_x86.hpp b/external/src/randomx/src/assembly_generator_x86.hpp deleted file mode 100644 index e962398..0000000 --- a/external/src/randomx/src/assembly_generator_x86.hpp +++ /dev/null @@ -1,94 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include "common.hpp" -#include - -namespace randomx { - - class Program; - class SuperscalarProgram; - class AssemblyGeneratorX86; - class Instruction; - - typedef void(AssemblyGeneratorX86::*InstructionGenerator)(Instruction&, int); - - class AssemblyGeneratorX86 { - public: - void generateProgram(Program& prog); - void generateAsm(SuperscalarProgram& prog); - void generateC(SuperscalarProgram& prog); - void printCode(std::ostream& os) { - os << asmCode.rdbuf(); - } - private: - void genAddressReg(Instruction&, const char*); - void genAddressRegDst(Instruction&, int); - int32_t genAddressImm(Instruction&); - void generateCode(Instruction&, int); - void traceint(Instruction&); - void traceflt(Instruction&); - void tracenop(Instruction&); - void h_IADD_RS(Instruction&, int); - void h_IADD_M(Instruction&, int); - void h_ISUB_R(Instruction&, int); - void h_ISUB_M(Instruction&, int); - void h_IMUL_R(Instruction&, int); - void h_IMUL_M(Instruction&, int); - void h_IMULH_R(Instruction&, int); - void h_IMULH_M(Instruction&, int); - void h_ISMULH_R(Instruction&, int); - void h_ISMULH_M(Instruction&, int); - void h_IMUL_RCP(Instruction&, int); - void h_INEG_R(Instruction&, int); - void h_IXOR_R(Instruction&, int); - void h_IXOR_M(Instruction&, int); - void h_IROR_R(Instruction&, int); - void h_IROL_R(Instruction&, int); - void h_ISWAP_R(Instruction&, int); - void h_FSWAP_R(Instruction&, int); - void h_FADD_R(Instruction&, int); - void h_FADD_M(Instruction&, int); - void h_FSUB_R(Instruction&, int); - void h_FSUB_M(Instruction&, int); - void h_FSCAL_R(Instruction&, int); - void h_FMUL_R(Instruction&, int); - void h_FDIV_M(Instruction&, int); - void h_FSQRT_R(Instruction&, int); - void h_CBRANCH(Instruction&, int); - void h_CFROUND(Instruction&, int); - void h_ISTORE(Instruction&, int); - void h_NOP(Instruction&, int); - - static InstructionGenerator engine[256]; - std::stringstream asmCode; - int registerUsage[RegistersCount]; - }; -} \ No newline at end of file diff --git a/external/src/randomx/src/blake2/blake2-impl.h b/external/src/randomx/src/blake2/blake2-impl.h deleted file mode 100644 index 617f7c8..0000000 --- a/external/src/randomx/src/blake2/blake2-impl.h +++ /dev/null @@ -1,76 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* Original code from Argon2 reference source code package used under CC0 Licence - * https://github.com/P-H-C/phc-winner-argon2 - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves -*/ - -#ifndef PORTABLE_BLAKE2_IMPL_H -#define PORTABLE_BLAKE2_IMPL_H - -#include - -#include "endian.h" - -static FORCE_INLINE uint64_t load48(const void *src) { - const uint8_t *p = (const uint8_t *)src; - uint64_t w = *p++; - w |= (uint64_t)(*p++) << 8; - w |= (uint64_t)(*p++) << 16; - w |= (uint64_t)(*p++) << 24; - w |= (uint64_t)(*p++) << 32; - w |= (uint64_t)(*p++) << 40; - return w; -} - -static FORCE_INLINE void store48(void *dst, uint64_t w) { - uint8_t *p = (uint8_t *)dst; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; -} - -static FORCE_INLINE uint32_t rotr32(const uint32_t w, const unsigned c) { - return (w >> c) | (w << (32 - c)); -} - -static FORCE_INLINE uint64_t rotr64(const uint64_t w, const unsigned c) { - return (w >> c) | (w << (64 - c)); -} - -#endif diff --git a/external/src/randomx/src/blake2/blake2.h b/external/src/randomx/src/blake2/blake2.h deleted file mode 100644 index 3d15be1..0000000 --- a/external/src/randomx/src/blake2/blake2.h +++ /dev/null @@ -1,116 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* Original code from Argon2 reference source code package used under CC0 Licence - * https://github.com/P-H-C/phc-winner-argon2 - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves -*/ - -#ifndef PORTABLE_BLAKE2_H -#define PORTABLE_BLAKE2_H - -#include -#include - -#if defined(__cplusplus) -extern "C" { -#endif - - enum blake2b_constant { - BLAKE2B_BLOCKBYTES = 128, - BLAKE2B_OUTBYTES = 64, - BLAKE2B_KEYBYTES = 64, - BLAKE2B_SALTBYTES = 16, - BLAKE2B_PERSONALBYTES = 16 - }; - -#pragma pack(push, 1) - typedef struct __blake2b_param { - uint8_t digest_length; /* 1 */ - uint8_t key_length; /* 2 */ - uint8_t fanout; /* 3 */ - uint8_t depth; /* 4 */ - uint32_t leaf_length; /* 8 */ - uint64_t node_offset; /* 16 */ - uint8_t node_depth; /* 17 */ - uint8_t inner_length; /* 18 */ - uint8_t reserved[14]; /* 32 */ - uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */ - uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */ - } blake2b_param; -#pragma pack(pop) - - typedef struct __blake2b_state { - uint64_t h[8]; - uint64_t t[2]; - uint64_t f[2]; - uint8_t buf[BLAKE2B_BLOCKBYTES]; - unsigned buflen; - unsigned outlen; - uint8_t last_node; - } blake2b_state; - - /* Ensure param structs have not been wrongly padded */ - /* Poor man's static_assert */ - enum { - blake2_size_check_0 = 1 / !!(CHAR_BIT == 8), - blake2_size_check_2 = - 1 / !!(sizeof(blake2b_param) == sizeof(uint64_t) * CHAR_BIT) - }; - - //randomx namespace -#define blake2b_init randomx_blake2b_init -#define blake2b_init_key randomx_blake2b_init_key -#define blake2b_init_param randomx_blake2b_init_param -#define blake2b_update randomx_blake2b_update -#define blake2b_final randomx_blake2b_final -#define blake2b randomx_blake2b -#define blake2b_long randomx_blake2b_long - - /* Streaming API */ - int blake2b_init(blake2b_state *S, size_t outlen); - int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, - size_t keylen); - int blake2b_init_param(blake2b_state *S, const blake2b_param *P); - int blake2b_update(blake2b_state *S, const void *in, size_t inlen); - int blake2b_final(blake2b_state *S, void *out, size_t outlen); - - /* Simple API */ - int blake2b(void *out, size_t outlen, const void *in, size_t inlen, - const void *key, size_t keylen); - - /* Argon2 Team - Begin Code */ - int blake2b_long(void *out, size_t outlen, const void *in, size_t inlen); - /* Argon2 Team - End Code */ - -#if defined(__cplusplus) -} -#endif - -#endif diff --git a/external/src/randomx/src/blake2/blake2b.c b/external/src/randomx/src/blake2/blake2b.c deleted file mode 100644 index b9f1b56..0000000 --- a/external/src/randomx/src/blake2/blake2b.c +++ /dev/null @@ -1,409 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* Original code from Argon2 reference source code package used under CC0 Licence - * https://github.com/P-H-C/phc-winner-argon2 - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves -*/ - -#include -#include -#include - -#include "blake2.h" -#include "blake2-impl.h" - -static const uint64_t blake2b_IV[8] = { - UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b), - UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1), - UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f), - UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) }; - -static const unsigned int blake2b_sigma[12][16] = { - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, - {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, - {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, - {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, - {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, - {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, - {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, - {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, - {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, -}; - -static FORCE_INLINE void blake2b_set_lastnode(blake2b_state *S) { - S->f[1] = (uint64_t)-1; -} - -static FORCE_INLINE void blake2b_set_lastblock(blake2b_state *S) { - if (S->last_node) { - blake2b_set_lastnode(S); - } - S->f[0] = (uint64_t)-1; -} - -static FORCE_INLINE void blake2b_increment_counter(blake2b_state *S, - uint64_t inc) { - S->t[0] += inc; - S->t[1] += (S->t[0] < inc); -} - -static FORCE_INLINE void blake2b_invalidate_state(blake2b_state *S) { - //clear_internal_memory(S, sizeof(*S)); /* wipe */ - blake2b_set_lastblock(S); /* invalidate for further use */ -} - -static FORCE_INLINE void blake2b_init0(blake2b_state *S) { - memset(S, 0, sizeof(*S)); - memcpy(S->h, blake2b_IV, sizeof(S->h)); -} - -int blake2b_init_param(blake2b_state *S, const blake2b_param *P) { - const unsigned char *p = (const unsigned char *)P; - unsigned int i; - - if (NULL == P || NULL == S) { - return -1; - } - - blake2b_init0(S); - /* IV XOR Parameter Block */ - for (i = 0; i < 8; ++i) { - S->h[i] ^= load64(&p[i * sizeof(S->h[i])]); - } - S->outlen = P->digest_length; - return 0; -} - -/* Sequential blake2b initialization */ -int blake2b_init(blake2b_state *S, size_t outlen) { - blake2b_param P; - - if (S == NULL) { - return -1; - } - - if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) { - blake2b_invalidate_state(S); - return -1; - } - - /* Setup Parameter Block for unkeyed BLAKE2 */ - P.digest_length = (uint8_t)outlen; - P.key_length = 0; - P.fanout = 1; - P.depth = 1; - P.leaf_length = 0; - P.node_offset = 0; - P.node_depth = 0; - P.inner_length = 0; - memset(P.reserved, 0, sizeof(P.reserved)); - memset(P.salt, 0, sizeof(P.salt)); - memset(P.personal, 0, sizeof(P.personal)); - - return blake2b_init_param(S, &P); -} - -int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, size_t keylen) { - blake2b_param P; - - if (S == NULL) { - return -1; - } - - if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) { - blake2b_invalidate_state(S); - return -1; - } - - if ((key == 0) || (keylen == 0) || (keylen > BLAKE2B_KEYBYTES)) { - blake2b_invalidate_state(S); - return -1; - } - - /* Setup Parameter Block for keyed BLAKE2 */ - P.digest_length = (uint8_t)outlen; - P.key_length = (uint8_t)keylen; - P.fanout = 1; - P.depth = 1; - P.leaf_length = 0; - P.node_offset = 0; - P.node_depth = 0; - P.inner_length = 0; - memset(P.reserved, 0, sizeof(P.reserved)); - memset(P.salt, 0, sizeof(P.salt)); - memset(P.personal, 0, sizeof(P.personal)); - - if (blake2b_init_param(S, &P) < 0) { - blake2b_invalidate_state(S); - return -1; - } - - { - uint8_t block[BLAKE2B_BLOCKBYTES]; - memset(block, 0, BLAKE2B_BLOCKBYTES); - memcpy(block, key, keylen); - blake2b_update(S, block, BLAKE2B_BLOCKBYTES); - /* Burn the key from stack */ - //clear_internal_memory(block, BLAKE2B_BLOCKBYTES); - } - return 0; -} - -static void blake2b_compress(blake2b_state *S, const uint8_t *block) { - uint64_t m[16]; - uint64_t v[16]; - unsigned int i, r; - - for (i = 0; i < 16; ++i) { - m[i] = load64(block + i * sizeof(m[i])); - } - - for (i = 0; i < 8; ++i) { - v[i] = S->h[i]; - } - - v[8] = blake2b_IV[0]; - v[9] = blake2b_IV[1]; - v[10] = blake2b_IV[2]; - v[11] = blake2b_IV[3]; - v[12] = blake2b_IV[4] ^ S->t[0]; - v[13] = blake2b_IV[5] ^ S->t[1]; - v[14] = blake2b_IV[6] ^ S->f[0]; - v[15] = blake2b_IV[7] ^ S->f[1]; - -#define G(r, i, a, b, c, d) \ - do { \ - a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \ - d = rotr64(d ^ a, 32); \ - c = c + d; \ - b = rotr64(b ^ c, 24); \ - a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \ - d = rotr64(d ^ a, 16); \ - c = c + d; \ - b = rotr64(b ^ c, 63); \ - } while ((void)0, 0) - -#define ROUND(r) \ - do { \ - G(r, 0, v[0], v[4], v[8], v[12]); \ - G(r, 1, v[1], v[5], v[9], v[13]); \ - G(r, 2, v[2], v[6], v[10], v[14]); \ - G(r, 3, v[3], v[7], v[11], v[15]); \ - G(r, 4, v[0], v[5], v[10], v[15]); \ - G(r, 5, v[1], v[6], v[11], v[12]); \ - G(r, 6, v[2], v[7], v[8], v[13]); \ - G(r, 7, v[3], v[4], v[9], v[14]); \ - } while ((void)0, 0) - - for (r = 0; r < 12; ++r) { - ROUND(r); - } - - for (i = 0; i < 8; ++i) { - S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; - } - -#undef G -#undef ROUND -} - -int blake2b_update(blake2b_state *S, const void *in, size_t inlen) { - const uint8_t *pin = (const uint8_t *)in; - - if (inlen == 0) { - return 0; - } - - /* Sanity check */ - if (S == NULL || in == NULL) { - return -1; - } - - /* Is this a reused state? */ - if (S->f[0] != 0) { - return -1; - } - - if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) { - /* Complete current block */ - size_t left = S->buflen; - size_t fill = BLAKE2B_BLOCKBYTES - left; - memcpy(&S->buf[left], pin, fill); - blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); - blake2b_compress(S, S->buf); - S->buflen = 0; - inlen -= fill; - pin += fill; - /* Avoid buffer copies when possible */ - while (inlen > BLAKE2B_BLOCKBYTES) { - blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); - blake2b_compress(S, pin); - inlen -= BLAKE2B_BLOCKBYTES; - pin += BLAKE2B_BLOCKBYTES; - } - } - memcpy(&S->buf[S->buflen], pin, inlen); - S->buflen += (unsigned int)inlen; - return 0; -} - -int blake2b_final(blake2b_state *S, void *out, size_t outlen) { - uint8_t buffer[BLAKE2B_OUTBYTES] = { 0 }; - unsigned int i; - - /* Sanity checks */ - if (S == NULL || out == NULL || outlen < S->outlen) { - return -1; - } - - /* Is this a reused state? */ - if (S->f[0] != 0) { - return -1; - } - - blake2b_increment_counter(S, S->buflen); - blake2b_set_lastblock(S); - memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */ - blake2b_compress(S, S->buf); - - for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */ - store64(buffer + sizeof(S->h[i]) * i, S->h[i]); - } - - memcpy(out, buffer, S->outlen); - //clear_internal_memory(buffer, sizeof(buffer)); - //clear_internal_memory(S->buf, sizeof(S->buf)); - //clear_internal_memory(S->h, sizeof(S->h)); - return 0; -} - -int blake2b(void *out, size_t outlen, const void *in, size_t inlen, - const void *key, size_t keylen) { - blake2b_state S; - int ret = -1; - - /* Verify parameters */ - if (NULL == in && inlen > 0) { - goto fail; - } - - if (NULL == out || outlen == 0 || outlen > BLAKE2B_OUTBYTES) { - goto fail; - } - - if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) { - goto fail; - } - - if (keylen > 0) { - if (blake2b_init_key(&S, outlen, key, keylen) < 0) { - goto fail; - } - } - else { - if (blake2b_init(&S, outlen) < 0) { - goto fail; - } - } - - if (blake2b_update(&S, in, inlen) < 0) { - goto fail; - } - ret = blake2b_final(&S, out, outlen); - -fail: - //clear_internal_memory(&S, sizeof(S)); - return ret; -} - -/* Argon2 Team - Begin Code */ -int blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) { - uint8_t *out = (uint8_t *)pout; - blake2b_state blake_state; - uint8_t outlen_bytes[sizeof(uint32_t)] = { 0 }; - int ret = -1; - - if (outlen > UINT32_MAX) { - goto fail; - } - - /* Ensure little-endian byte order! */ - store32(outlen_bytes, (uint32_t)outlen); - -#define TRY(statement) \ - do { \ - ret = statement; \ - if (ret < 0) { \ - goto fail; \ - } \ - } while ((void)0, 0) - - if (outlen <= BLAKE2B_OUTBYTES) { - TRY(blake2b_init(&blake_state, outlen)); - TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes))); - TRY(blake2b_update(&blake_state, in, inlen)); - TRY(blake2b_final(&blake_state, out, outlen)); - } - else { - uint32_t toproduce; - uint8_t out_buffer[BLAKE2B_OUTBYTES]; - uint8_t in_buffer[BLAKE2B_OUTBYTES]; - TRY(blake2b_init(&blake_state, BLAKE2B_OUTBYTES)); - TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes))); - TRY(blake2b_update(&blake_state, in, inlen)); - TRY(blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES)); - memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); - out += BLAKE2B_OUTBYTES / 2; - toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2; - - while (toproduce > BLAKE2B_OUTBYTES) { - memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); - TRY(blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer, - BLAKE2B_OUTBYTES, NULL, 0)); - memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); - out += BLAKE2B_OUTBYTES / 2; - toproduce -= BLAKE2B_OUTBYTES / 2; - } - - memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); - TRY(blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL, - 0)); - memcpy(out, out_buffer, toproduce); - } -fail: - //clear_internal_memory(&blake_state, sizeof(blake_state)); - return ret; -#undef TRY -} -/* Argon2 Team - End Code */ - diff --git a/external/src/randomx/src/blake2/blamka-round-avx2.h b/external/src/randomx/src/blake2/blamka-round-avx2.h deleted file mode 100644 index 4838261..0000000 --- a/external/src/randomx/src/blake2/blamka-round-avx2.h +++ /dev/null @@ -1,189 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* Original code from Argon2 reference source code package used under CC0 Licence - * https://github.com/P-H-C/phc-winner-argon2 - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves -*/ - -#ifndef BLAKE_ROUND_MKA_OPT_H -#define BLAKE_ROUND_MKA_OPT_H - -#include "blake2-impl.h" - -#ifdef __GNUC__ -#include -#else -#include -#endif - -#define rotr32(x) _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1)) -#define rotr24(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)) -#define rotr16(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)) -#define rotr63(x) _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x))) - -#define G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ - do { \ - __m256i ml = _mm256_mul_epu32(A0, B0); \ - ml = _mm256_add_epi64(ml, ml); \ - A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \ - D0 = _mm256_xor_si256(D0, A0); \ - D0 = rotr32(D0); \ - \ - ml = _mm256_mul_epu32(C0, D0); \ - ml = _mm256_add_epi64(ml, ml); \ - C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \ - \ - B0 = _mm256_xor_si256(B0, C0); \ - B0 = rotr24(B0); \ - \ - ml = _mm256_mul_epu32(A1, B1); \ - ml = _mm256_add_epi64(ml, ml); \ - A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \ - D1 = _mm256_xor_si256(D1, A1); \ - D1 = rotr32(D1); \ - \ - ml = _mm256_mul_epu32(C1, D1); \ - ml = _mm256_add_epi64(ml, ml); \ - C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \ - \ - B1 = _mm256_xor_si256(B1, C1); \ - B1 = rotr24(B1); \ - } while((void)0, 0); - -#define G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ - do { \ - __m256i ml = _mm256_mul_epu32(A0, B0); \ - ml = _mm256_add_epi64(ml, ml); \ - A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \ - D0 = _mm256_xor_si256(D0, A0); \ - D0 = rotr16(D0); \ - \ - ml = _mm256_mul_epu32(C0, D0); \ - ml = _mm256_add_epi64(ml, ml); \ - C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \ - B0 = _mm256_xor_si256(B0, C0); \ - B0 = rotr63(B0); \ - \ - ml = _mm256_mul_epu32(A1, B1); \ - ml = _mm256_add_epi64(ml, ml); \ - A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \ - D1 = _mm256_xor_si256(D1, A1); \ - D1 = rotr16(D1); \ - \ - ml = _mm256_mul_epu32(C1, D1); \ - ml = _mm256_add_epi64(ml, ml); \ - C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \ - B1 = _mm256_xor_si256(B1, C1); \ - B1 = rotr63(B1); \ - } while((void)0, 0); - -#define DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \ - C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ - D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \ - \ - B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \ - C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ - D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \ - } while((void)0, 0); - -#define DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \ - do { \ - __m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \ - __m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \ - B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ - B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ - \ - tmp1 = C0; \ - C0 = C1; \ - C1 = tmp1; \ - \ - tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \ - tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \ - D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ - D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ - } while(0); - -#define UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \ - C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ - D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \ - \ - B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \ - C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ - D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \ - } while((void)0, 0); - -#define UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \ - do { \ - __m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \ - __m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \ - B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ - B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ - \ - tmp1 = C0; \ - C0 = C1; \ - C1 = tmp1; \ - \ - tmp1 = _mm256_blend_epi32(D0, D1, 0x33); \ - tmp2 = _mm256_blend_epi32(D0, D1, 0xCC); \ - D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ - D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ - } while((void)0, 0); - -#define BLAKE2_ROUND_1(A0, A1, B0, B1, C0, C1, D0, D1) \ - do{ \ - G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ - G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ - \ - DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \ - \ - G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ - G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ - \ - UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \ - } while((void)0, 0); - -#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \ - do{ \ - G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ - G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ - \ - DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \ - \ - G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ - G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ - \ - UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \ - } while((void)0, 0); - -#endif /* BLAKE_ROUND_MKA_OPT_H */ diff --git a/external/src/randomx/src/blake2/blamka-round-ref.h b/external/src/randomx/src/blake2/blamka-round-ref.h deleted file mode 100644 index f1fb50b..0000000 --- a/external/src/randomx/src/blake2/blamka-round-ref.h +++ /dev/null @@ -1,73 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* Original code from Argon2 reference source code package used under CC0 Licence - * https://github.com/P-H-C/phc-winner-argon2 - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves -*/ - -#ifndef BLAKE_ROUND_MKA_H -#define BLAKE_ROUND_MKA_H - -#include "blake2.h" -#include "blake2-impl.h" - - /* designed by the Lyra PHC team */ -static FORCE_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) { - const uint64_t m = UINT64_C(0xFFFFFFFF); - const uint64_t xy = (x & m) * (y & m); - return x + y + 2 * xy; -} - -#define G(a, b, c, d) \ - do { \ - a = fBlaMka(a, b); \ - d = rotr64(d ^ a, 32); \ - c = fBlaMka(c, d); \ - b = rotr64(b ^ c, 24); \ - a = fBlaMka(a, b); \ - d = rotr64(d ^ a, 16); \ - c = fBlaMka(c, d); \ - b = rotr64(b ^ c, 63); \ - } while ((void)0, 0) - -#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, \ - v12, v13, v14, v15) \ - do { \ - G(v0, v4, v8, v12); \ - G(v1, v5, v9, v13); \ - G(v2, v6, v10, v14); \ - G(v3, v7, v11, v15); \ - G(v0, v5, v10, v15); \ - G(v1, v6, v11, v12); \ - G(v2, v7, v8, v13); \ - G(v3, v4, v9, v14); \ - } while ((void)0, 0) - -#endif diff --git a/external/src/randomx/src/blake2/blamka-round-ssse3.h b/external/src/randomx/src/blake2/blamka-round-ssse3.h deleted file mode 100644 index f2d3b5d..0000000 --- a/external/src/randomx/src/blake2/blamka-round-ssse3.h +++ /dev/null @@ -1,162 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* Original code from Argon2 reference source code package used under CC0 Licence - * https://github.com/P-H-C/phc-winner-argon2 - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves -*/ - -#ifndef BLAKE_ROUND_MKA_OPT_H -#define BLAKE_ROUND_MKA_OPT_H - -#include "blake2-impl.h" - -#ifdef __GNUC__ -#include -#else -#include -#endif - -#ifdef _mm_roti_epi64 //clang defines it using the XOP instruction set -#undef _mm_roti_epi64 -#endif - -#define r16 \ - (_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)) -#define r24 \ - (_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)) -#define _mm_roti_epi64(x, c) \ - (-(c) == 32) \ - ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \ - : (-(c) == 24) \ - ? _mm_shuffle_epi8((x), r24) \ - : (-(c) == 16) \ - ? _mm_shuffle_epi8((x), r16) \ - : (-(c) == 63) \ - ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \ - _mm_add_epi64((x), (x))) \ - : _mm_xor_si128(_mm_srli_epi64((x), -(c)), \ - _mm_slli_epi64((x), 64 - (-(c)))) - -static FORCE_INLINE __m128i fBlaMka(__m128i x, __m128i y) { - const __m128i z = _mm_mul_epu32(x, y); - return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z)); -} - -#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - A0 = fBlaMka(A0, B0); \ - A1 = fBlaMka(A1, B1); \ - \ - D0 = _mm_xor_si128(D0, A0); \ - D1 = _mm_xor_si128(D1, A1); \ - \ - D0 = _mm_roti_epi64(D0, -32); \ - D1 = _mm_roti_epi64(D1, -32); \ - \ - C0 = fBlaMka(C0, D0); \ - C1 = fBlaMka(C1, D1); \ - \ - B0 = _mm_xor_si128(B0, C0); \ - B1 = _mm_xor_si128(B1, C1); \ - \ - B0 = _mm_roti_epi64(B0, -24); \ - B1 = _mm_roti_epi64(B1, -24); \ - } while ((void)0, 0) - -#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - A0 = fBlaMka(A0, B0); \ - A1 = fBlaMka(A1, B1); \ - \ - D0 = _mm_xor_si128(D0, A0); \ - D1 = _mm_xor_si128(D1, A1); \ - \ - D0 = _mm_roti_epi64(D0, -16); \ - D1 = _mm_roti_epi64(D1, -16); \ - \ - C0 = fBlaMka(C0, D0); \ - C1 = fBlaMka(C1, D1); \ - \ - B0 = _mm_xor_si128(B0, C0); \ - B1 = _mm_xor_si128(B1, C1); \ - \ - B0 = _mm_roti_epi64(B0, -63); \ - B1 = _mm_roti_epi64(B1, -63); \ - } while ((void)0, 0) - -#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - __m128i t0 = _mm_alignr_epi8(B1, B0, 8); \ - __m128i t1 = _mm_alignr_epi8(B0, B1, 8); \ - B0 = t0; \ - B1 = t1; \ - \ - t0 = C0; \ - C0 = C1; \ - C1 = t0; \ - \ - t0 = _mm_alignr_epi8(D1, D0, 8); \ - t1 = _mm_alignr_epi8(D0, D1, 8); \ - D0 = t1; \ - D1 = t0; \ - } while ((void)0, 0) - -#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - __m128i t0 = _mm_alignr_epi8(B0, B1, 8); \ - __m128i t1 = _mm_alignr_epi8(B1, B0, 8); \ - B0 = t0; \ - B1 = t1; \ - \ - t0 = C0; \ - C0 = C1; \ - C1 = t0; \ - \ - t0 = _mm_alignr_epi8(D0, D1, 8); \ - t1 = _mm_alignr_epi8(D1, D0, 8); \ - D0 = t1; \ - D1 = t0; \ - } while ((void)0, 0) - -#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \ - do { \ - G1(A0, B0, C0, D0, A1, B1, C1, D1); \ - G2(A0, B0, C0, D0, A1, B1, C1, D1); \ - \ - DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ - \ - G1(A0, B0, C0, D0, A1, B1, C1, D1); \ - G2(A0, B0, C0, D0, A1, B1, C1, D1); \ - \ - UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ - } while ((void)0, 0) - - -#endif /* BLAKE_ROUND_MKA_OPT_H */ diff --git a/external/src/randomx/src/blake2/endian.h b/external/src/randomx/src/blake2/endian.h deleted file mode 100644 index c7afed2..0000000 --- a/external/src/randomx/src/blake2/endian.h +++ /dev/null @@ -1,107 +0,0 @@ -#pragma once -#include -#include - -#if defined(_MSC_VER) -#define FORCE_INLINE __inline -#elif defined(__GNUC__) || defined(__clang__) -#define FORCE_INLINE __inline__ -#else -#define FORCE_INLINE -#endif - - /* Argon2 Team - Begin Code */ - /* - Not an exhaustive list, but should cover the majority of modern platforms - Additionally, the code will always be correct---this is only a performance - tweak. - */ -#if (defined(__BYTE_ORDER__) && \ - (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) || \ - defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || defined(__MIPSEL__) || \ - defined(__AARCH64EL__) || defined(__amd64__) || defined(__i386__) || \ - defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || \ - defined(_M_ARM) -#define NATIVE_LITTLE_ENDIAN -#endif - /* Argon2 Team - End Code */ - -static FORCE_INLINE uint32_t load32(const void *src) { -#if defined(NATIVE_LITTLE_ENDIAN) - uint32_t w; - memcpy(&w, src, sizeof w); - return w; -#else - const uint8_t *p = (const uint8_t *)src; - uint32_t w = *p++; - w |= (uint32_t)(*p++) << 8; - w |= (uint32_t)(*p++) << 16; - w |= (uint32_t)(*p++) << 24; - return w; -#endif -} - -static FORCE_INLINE uint64_t load64_native(const void *src) { - uint64_t w; - memcpy(&w, src, sizeof w); - return w; -} - -static FORCE_INLINE uint64_t load64(const void *src) { -#if defined(NATIVE_LITTLE_ENDIAN) - return load64_native(src); -#else - const uint8_t *p = (const uint8_t *)src; - uint64_t w = *p++; - w |= (uint64_t)(*p++) << 8; - w |= (uint64_t)(*p++) << 16; - w |= (uint64_t)(*p++) << 24; - w |= (uint64_t)(*p++) << 32; - w |= (uint64_t)(*p++) << 40; - w |= (uint64_t)(*p++) << 48; - w |= (uint64_t)(*p++) << 56; - return w; -#endif -} - -static FORCE_INLINE void store32(void *dst, uint32_t w) { -#if defined(NATIVE_LITTLE_ENDIAN) - memcpy(dst, &w, sizeof w); -#else - uint8_t *p = (uint8_t *)dst; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; -#endif -} - -static FORCE_INLINE void store64_native(void *dst, uint64_t w) { - memcpy(dst, &w, sizeof w); -} - -static FORCE_INLINE void store64(void *dst, uint64_t w) { -#if defined(NATIVE_LITTLE_ENDIAN) - store64_native(dst, w); -#else - uint8_t *p = (uint8_t *)dst; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; - w >>= 8; - *p++ = (uint8_t)w; -#endif -} diff --git a/external/src/randomx/src/blake2_generator.cpp b/external/src/randomx/src/blake2_generator.cpp deleted file mode 100644 index 3f2d028..0000000 --- a/external/src/randomx/src/blake2_generator.cpp +++ /dev/null @@ -1,62 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include -#include "blake2/blake2.h" -#include "blake2/endian.h" -#include "blake2_generator.hpp" - -namespace randomx { - - constexpr int maxSeedSize = 60; - - Blake2Generator::Blake2Generator(const void* seed, size_t seedSize, int nonce) : dataIndex(sizeof(data)) { - memset(data, 0, sizeof(data)); - memcpy(data, seed, seedSize > maxSeedSize ? maxSeedSize : seedSize); - store32(&data[maxSeedSize], nonce); - } - - uint8_t Blake2Generator::getByte() { - checkData(1); - return data[dataIndex++]; - } - - uint32_t Blake2Generator::getUInt32() { - checkData(4); - auto ret = load32(&data[dataIndex]); - dataIndex += 4; - return ret; - } - - void Blake2Generator::checkData(const size_t bytesNeeded) { - if (dataIndex + bytesNeeded > sizeof(data)) { - blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0); - dataIndex = 0; - } - } -} \ No newline at end of file diff --git a/external/src/randomx/src/blake2_generator.hpp b/external/src/randomx/src/blake2_generator.hpp deleted file mode 100644 index 5e7f61f..0000000 --- a/external/src/randomx/src/blake2_generator.hpp +++ /dev/null @@ -1,46 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include - -namespace randomx { - - class Blake2Generator { - public: - Blake2Generator(const void* seed, size_t seedSize, int nonce = 0); - uint8_t getByte(); - uint32_t getUInt32(); - private: - void checkData(const size_t); - - uint8_t data[64]; - size_t dataIndex; - }; -} \ No newline at end of file diff --git a/external/src/randomx/src/bytecode_machine.cpp b/external/src/randomx/src/bytecode_machine.cpp deleted file mode 100644 index 7d8e902..0000000 --- a/external/src/randomx/src/bytecode_machine.cpp +++ /dev/null @@ -1,482 +0,0 @@ -/* -Copyright (c) 2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include "bytecode_machine.hpp" -#include "reciprocal.h" - -namespace randomx { - - const int_reg_t BytecodeMachine::zero = 0; - -#define INSTR_CASE(x) case InstructionType::x: \ - exe_ ## x(ibc, pc, scratchpad, config); \ - break; - - void BytecodeMachine::executeInstruction(RANDOMX_EXE_ARGS) { - switch (ibc.type) - { - INSTR_CASE(IADD_RS) - INSTR_CASE(IADD_M) - INSTR_CASE(ISUB_R) - INSTR_CASE(ISUB_M) - INSTR_CASE(IMUL_R) - INSTR_CASE(IMUL_M) - INSTR_CASE(IMULH_R) - INSTR_CASE(IMULH_M) - INSTR_CASE(ISMULH_R) - INSTR_CASE(ISMULH_M) - INSTR_CASE(INEG_R) - INSTR_CASE(IXOR_R) - INSTR_CASE(IXOR_M) - INSTR_CASE(IROR_R) - INSTR_CASE(IROL_R) - INSTR_CASE(ISWAP_R) - INSTR_CASE(FSWAP_R) - INSTR_CASE(FADD_R) - INSTR_CASE(FADD_M) - INSTR_CASE(FSUB_R) - INSTR_CASE(FSUB_M) - INSTR_CASE(FSCAL_R) - INSTR_CASE(FMUL_R) - INSTR_CASE(FDIV_M) - INSTR_CASE(FSQRT_R) - INSTR_CASE(CBRANCH) - INSTR_CASE(CFROUND) - INSTR_CASE(ISTORE) - - case InstructionType::NOP: - break; - - case InstructionType::IMUL_RCP: //executed as IMUL_R - default: - UNREACHABLE; - } - } - - void BytecodeMachine::compileInstruction(RANDOMX_GEN_ARGS) { - int opcode = instr.opcode; - - if (opcode < ceil_IADD_RS) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::IADD_RS; - ibc.idst = &nreg->r[dst]; - if (dst != RegisterNeedsDisplacement) { - ibc.isrc = &nreg->r[src]; - ibc.shift = instr.getModShift(); - ibc.imm = 0; - } - else { - ibc.isrc = &nreg->r[src]; - ibc.shift = instr.getModShift(); - ibc.imm = signExtend2sCompl(instr.getImm32()); - } - registerUsage[dst] = i; - return; - } - - if (opcode < ceil_IADD_M) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::IADD_M; - ibc.idst = &nreg->r[dst]; - ibc.imm = signExtend2sCompl(instr.getImm32()); - if (src != dst) { - ibc.isrc = &nreg->r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); - } - else { - ibc.isrc = &zero; - ibc.memMask = ScratchpadL3Mask; - } - registerUsage[dst] = i; - return; - } - - if (opcode < ceil_ISUB_R) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::ISUB_R; - ibc.idst = &nreg->r[dst]; - if (src != dst) { - ibc.isrc = &nreg->r[src]; - } - else { - ibc.imm = signExtend2sCompl(instr.getImm32()); - ibc.isrc = &ibc.imm; - } - registerUsage[dst] = i; - return; - } - - if (opcode < ceil_ISUB_M) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::ISUB_M; - ibc.idst = &nreg->r[dst]; - ibc.imm = signExtend2sCompl(instr.getImm32()); - if (src != dst) { - ibc.isrc = &nreg->r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); - } - else { - ibc.isrc = &zero; - ibc.memMask = ScratchpadL3Mask; - } - registerUsage[dst] = i; - return; - } - - if (opcode < ceil_IMUL_R) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::IMUL_R; - ibc.idst = &nreg->r[dst]; - if (src != dst) { - ibc.isrc = &nreg->r[src]; - } - else { - ibc.imm = signExtend2sCompl(instr.getImm32()); - ibc.isrc = &ibc.imm; - } - registerUsage[dst] = i; - return; - } - - if (opcode < ceil_IMUL_M) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::IMUL_M; - ibc.idst = &nreg->r[dst]; - ibc.imm = signExtend2sCompl(instr.getImm32()); - if (src != dst) { - ibc.isrc = &nreg->r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); - } - else { - ibc.isrc = &zero; - ibc.memMask = ScratchpadL3Mask; - } - registerUsage[dst] = i; - return; - } - - if (opcode < ceil_IMULH_R) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::IMULH_R; - ibc.idst = &nreg->r[dst]; - ibc.isrc = &nreg->r[src]; - registerUsage[dst] = i; - return; - } - - if (opcode < ceil_IMULH_M) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::IMULH_M; - ibc.idst = &nreg->r[dst]; - ibc.imm = signExtend2sCompl(instr.getImm32()); - if (src != dst) { - ibc.isrc = &nreg->r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); - } - else { - ibc.isrc = &zero; - ibc.memMask = ScratchpadL3Mask; - } - registerUsage[dst] = i; - return; - } - - if (opcode < ceil_ISMULH_R) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::ISMULH_R; - ibc.idst = &nreg->r[dst]; - ibc.isrc = &nreg->r[src]; - registerUsage[dst] = i; - return; - } - - if (opcode < ceil_ISMULH_M) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::ISMULH_M; - ibc.idst = &nreg->r[dst]; - ibc.imm = signExtend2sCompl(instr.getImm32()); - if (src != dst) { - ibc.isrc = &nreg->r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); - } - else { - ibc.isrc = &zero; - ibc.memMask = ScratchpadL3Mask; - } - registerUsage[dst] = i; - return; - } - - if (opcode < ceil_IMUL_RCP) { - uint64_t divisor = instr.getImm32(); - if (!isZeroOrPowerOf2(divisor)) { - auto dst = instr.dst % RegistersCount; - ibc.type = InstructionType::IMUL_R; - ibc.idst = &nreg->r[dst]; - ibc.imm = randomx_reciprocal(divisor); - ibc.isrc = &ibc.imm; - registerUsage[dst] = i; - } - else { - ibc.type = InstructionType::NOP; - } - return; - } - - if (opcode < ceil_INEG_R) { - auto dst = instr.dst % RegistersCount; - ibc.type = InstructionType::INEG_R; - ibc.idst = &nreg->r[dst]; - registerUsage[dst] = i; - return; - } - - if (opcode < ceil_IXOR_R) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::IXOR_R; - ibc.idst = &nreg->r[dst]; - if (src != dst) { - ibc.isrc = &nreg->r[src]; - } - else { - ibc.imm = signExtend2sCompl(instr.getImm32()); - ibc.isrc = &ibc.imm; - } - registerUsage[dst] = i; - return; - } - - if (opcode < ceil_IXOR_M) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::IXOR_M; - ibc.idst = &nreg->r[dst]; - ibc.imm = signExtend2sCompl(instr.getImm32()); - if (src != dst) { - ibc.isrc = &nreg->r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); - } - else { - ibc.isrc = &zero; - ibc.memMask = ScratchpadL3Mask; - } - registerUsage[dst] = i; - return; - } - - if (opcode < ceil_IROR_R) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::IROR_R; - ibc.idst = &nreg->r[dst]; - if (src != dst) { - ibc.isrc = &nreg->r[src]; - } - else { - ibc.imm = instr.getImm32(); - ibc.isrc = &ibc.imm; - } - registerUsage[dst] = i; - return; - } - - if (opcode < ceil_IROL_R) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::IROL_R; - ibc.idst = &nreg->r[dst]; - if (src != dst) { - ibc.isrc = &nreg->r[src]; - } - else { - ibc.imm = instr.getImm32(); - ibc.isrc = &ibc.imm; - } - registerUsage[dst] = i; - return; - } - - if (opcode < ceil_ISWAP_R) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - if (src != dst) { - ibc.idst = &nreg->r[dst]; - ibc.isrc = &nreg->r[src]; - ibc.type = InstructionType::ISWAP_R; - registerUsage[dst] = i; - registerUsage[src] = i; - } - else { - ibc.type = InstructionType::NOP; - } - return; - } - - if (opcode < ceil_FSWAP_R) { - auto dst = instr.dst % RegistersCount; - ibc.type = InstructionType::FSWAP_R; - if (dst < RegisterCountFlt) - ibc.fdst = &nreg->f[dst]; - else - ibc.fdst = &nreg->e[dst - RegisterCountFlt]; - return; - } - - if (opcode < ceil_FADD_R) { - auto dst = instr.dst % RegisterCountFlt; - auto src = instr.src % RegisterCountFlt; - ibc.type = InstructionType::FADD_R; - ibc.fdst = &nreg->f[dst]; - ibc.fsrc = &nreg->a[src]; - return; - } - - if (opcode < ceil_FADD_M) { - auto dst = instr.dst % RegisterCountFlt; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::FADD_M; - ibc.fdst = &nreg->f[dst]; - ibc.isrc = &nreg->r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); - ibc.imm = signExtend2sCompl(instr.getImm32()); - return; - } - - if (opcode < ceil_FSUB_R) { - auto dst = instr.dst % RegisterCountFlt; - auto src = instr.src % RegisterCountFlt; - ibc.type = InstructionType::FSUB_R; - ibc.fdst = &nreg->f[dst]; - ibc.fsrc = &nreg->a[src]; - return; - } - - if (opcode < ceil_FSUB_M) { - auto dst = instr.dst % RegisterCountFlt; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::FSUB_M; - ibc.fdst = &nreg->f[dst]; - ibc.isrc = &nreg->r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); - ibc.imm = signExtend2sCompl(instr.getImm32()); - return; - } - - if (opcode < ceil_FSCAL_R) { - auto dst = instr.dst % RegisterCountFlt; - ibc.fdst = &nreg->f[dst]; - ibc.type = InstructionType::FSCAL_R; - return; - } - - if (opcode < ceil_FMUL_R) { - auto dst = instr.dst % RegisterCountFlt; - auto src = instr.src % RegisterCountFlt; - ibc.type = InstructionType::FMUL_R; - ibc.fdst = &nreg->e[dst]; - ibc.fsrc = &nreg->a[src]; - return; - } - - if (opcode < ceil_FDIV_M) { - auto dst = instr.dst % RegisterCountFlt; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::FDIV_M; - ibc.fdst = &nreg->e[dst]; - ibc.isrc = &nreg->r[src]; - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); - ibc.imm = signExtend2sCompl(instr.getImm32()); - return; - } - - if (opcode < ceil_FSQRT_R) { - auto dst = instr.dst % RegisterCountFlt; - ibc.type = InstructionType::FSQRT_R; - ibc.fdst = &nreg->e[dst]; - return; - } - - if (opcode < ceil_CBRANCH) { - ibc.type = InstructionType::CBRANCH; - //jump condition - int creg = instr.dst % RegistersCount; - ibc.idst = &nreg->r[creg]; - ibc.target = registerUsage[creg]; - int shift = instr.getModCond() + ConditionOffset; - ibc.imm = signExtend2sCompl(instr.getImm32()) | (1ULL << shift); - if (ConditionOffset > 0 || shift > 0) //clear the bit below the condition mask - this limits the number of successive jumps to 2 - ibc.imm &= ~(1ULL << (shift - 1)); - ibc.memMask = ConditionMask << shift; - //mark all registers as used - for (unsigned j = 0; j < RegistersCount; ++j) { - registerUsage[j] = i; - } - return; - } - - if (opcode < ceil_CFROUND) { - auto src = instr.src % RegistersCount; - ibc.isrc = &nreg->r[src]; - ibc.type = InstructionType::CFROUND; - ibc.imm = instr.getImm32() & 63; - return; - } - - if (opcode < ceil_ISTORE) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::ISTORE; - ibc.idst = &nreg->r[dst]; - ibc.isrc = &nreg->r[src]; - ibc.imm = signExtend2sCompl(instr.getImm32()); - if (instr.getModCond() < StoreL3Condition) - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); - else - ibc.memMask = ScratchpadL3Mask; - return; - } - - if (opcode < ceil_NOP) { - ibc.type = InstructionType::NOP; - return; - } - - UNREACHABLE; - } -} diff --git a/external/src/randomx/src/bytecode_machine.hpp b/external/src/randomx/src/bytecode_machine.hpp deleted file mode 100644 index 5e82e0d..0000000 --- a/external/src/randomx/src/bytecode_machine.hpp +++ /dev/null @@ -1,322 +0,0 @@ -/* -Copyright (c) 2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include "common.hpp" -#include "intrin_portable.h" -#include "instruction.hpp" -#include "program.hpp" - -namespace randomx { - - //register file in machine byte order - struct NativeRegisterFile { - int_reg_t r[RegistersCount] = { 0 }; - rx_vec_f128 f[RegisterCountFlt]; - rx_vec_f128 e[RegisterCountFlt]; - rx_vec_f128 a[RegisterCountFlt]; - }; - - struct InstructionByteCode { - union { - int_reg_t* idst; - rx_vec_f128* fdst; - }; - union { - const int_reg_t* isrc; - const rx_vec_f128* fsrc; - }; - union { - uint64_t imm; - int64_t simm; - }; - InstructionType type; - union { - int16_t target; - uint16_t shift; - }; - uint32_t memMask; - }; - -#define OPCODE_CEIL_DECLARE(curr, prev) constexpr int ceil_ ## curr = ceil_ ## prev + RANDOMX_FREQ_ ## curr; - constexpr int ceil_NULL = 0; - OPCODE_CEIL_DECLARE(IADD_RS, NULL); - OPCODE_CEIL_DECLARE(IADD_M, IADD_RS); - OPCODE_CEIL_DECLARE(ISUB_R, IADD_M); - OPCODE_CEIL_DECLARE(ISUB_M, ISUB_R); - OPCODE_CEIL_DECLARE(IMUL_R, ISUB_M); - OPCODE_CEIL_DECLARE(IMUL_M, IMUL_R); - OPCODE_CEIL_DECLARE(IMULH_R, IMUL_M); - OPCODE_CEIL_DECLARE(IMULH_M, IMULH_R); - OPCODE_CEIL_DECLARE(ISMULH_R, IMULH_M); - OPCODE_CEIL_DECLARE(ISMULH_M, ISMULH_R); - OPCODE_CEIL_DECLARE(IMUL_RCP, ISMULH_M); - OPCODE_CEIL_DECLARE(INEG_R, IMUL_RCP); - OPCODE_CEIL_DECLARE(IXOR_R, INEG_R); - OPCODE_CEIL_DECLARE(IXOR_M, IXOR_R); - OPCODE_CEIL_DECLARE(IROR_R, IXOR_M); - OPCODE_CEIL_DECLARE(IROL_R, IROR_R); - OPCODE_CEIL_DECLARE(ISWAP_R, IROL_R); - OPCODE_CEIL_DECLARE(FSWAP_R, ISWAP_R); - OPCODE_CEIL_DECLARE(FADD_R, FSWAP_R); - OPCODE_CEIL_DECLARE(FADD_M, FADD_R); - OPCODE_CEIL_DECLARE(FSUB_R, FADD_M); - OPCODE_CEIL_DECLARE(FSUB_M, FSUB_R); - OPCODE_CEIL_DECLARE(FSCAL_R, FSUB_M); - OPCODE_CEIL_DECLARE(FMUL_R, FSCAL_R); - OPCODE_CEIL_DECLARE(FDIV_M, FMUL_R); - OPCODE_CEIL_DECLARE(FSQRT_R, FDIV_M); - OPCODE_CEIL_DECLARE(CBRANCH, FSQRT_R); - OPCODE_CEIL_DECLARE(CFROUND, CBRANCH); - OPCODE_CEIL_DECLARE(ISTORE, CFROUND); - OPCODE_CEIL_DECLARE(NOP, ISTORE); -#undef OPCODE_CEIL_DECLARE - -#define RANDOMX_EXE_ARGS InstructionByteCode& ibc, int& pc, uint8_t* scratchpad, ProgramConfiguration& config -#define RANDOMX_GEN_ARGS Instruction& instr, int i, InstructionByteCode& ibc - - class BytecodeMachine; - - typedef void(BytecodeMachine::*InstructionGenBytecode)(RANDOMX_GEN_ARGS); - - class BytecodeMachine { - public: - void beginCompilation(NativeRegisterFile& regFile) { - for (unsigned i = 0; i < RegistersCount; ++i) { - registerUsage[i] = -1; - } - nreg = ®File; - } - - void compileProgram(Program& program, InstructionByteCode bytecode[RANDOMX_PROGRAM_SIZE], NativeRegisterFile& regFile) { - beginCompilation(regFile); - for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) { - auto& instr = program(i); - auto& ibc = bytecode[i]; - compileInstruction(instr, i, ibc); - } - } - - static void executeBytecode(InstructionByteCode bytecode[RANDOMX_PROGRAM_SIZE], uint8_t* scratchpad, ProgramConfiguration& config) { - for (int pc = 0; pc < RANDOMX_PROGRAM_SIZE; ++pc) { - auto& ibc = bytecode[pc]; - executeInstruction(ibc, pc, scratchpad, config); - } - } - - void compileInstruction(RANDOMX_GEN_ARGS) -#ifdef RANDOMX_GEN_TABLE - { - auto generator = genTable[instr.opcode]; - (this->*generator)(instr, i, ibc); - } -#else - ; -#endif - - static void executeInstruction(RANDOMX_EXE_ARGS); - - static void exe_IADD_RS(RANDOMX_EXE_ARGS) { - *ibc.idst += (*ibc.isrc << ibc.shift) + ibc.imm; - } - - static void exe_IADD_M(RANDOMX_EXE_ARGS) { - *ibc.idst += load64(getScratchpadAddress(ibc, scratchpad)); - } - - static void exe_ISUB_R(RANDOMX_EXE_ARGS) { - *ibc.idst -= *ibc.isrc; - } - - static void exe_ISUB_M(RANDOMX_EXE_ARGS) { - *ibc.idst -= load64(getScratchpadAddress(ibc, scratchpad)); - } - - static void exe_IMUL_R(RANDOMX_EXE_ARGS) { - *ibc.idst *= *ibc.isrc; - } - - static void exe_IMUL_M(RANDOMX_EXE_ARGS) { - *ibc.idst *= load64(getScratchpadAddress(ibc, scratchpad)); - } - - static void exe_IMULH_R(RANDOMX_EXE_ARGS) { - *ibc.idst = mulh(*ibc.idst, *ibc.isrc); - } - - static void exe_IMULH_M(RANDOMX_EXE_ARGS) { - *ibc.idst = mulh(*ibc.idst, load64(getScratchpadAddress(ibc, scratchpad))); - } - - static void exe_ISMULH_R(RANDOMX_EXE_ARGS) { - *ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(*ibc.isrc)); - } - - static void exe_ISMULH_M(RANDOMX_EXE_ARGS) { - *ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(load64(getScratchpadAddress(ibc, scratchpad)))); - } - - static void exe_INEG_R(RANDOMX_EXE_ARGS) { - *ibc.idst = ~(*ibc.idst) + 1; //two's complement negative - } - - static void exe_IXOR_R(RANDOMX_EXE_ARGS) { - *ibc.idst ^= *ibc.isrc; - } - - static void exe_IXOR_M(RANDOMX_EXE_ARGS) { - *ibc.idst ^= load64(getScratchpadAddress(ibc, scratchpad)); - } - - static void exe_IROR_R(RANDOMX_EXE_ARGS) { - *ibc.idst = rotr(*ibc.idst, *ibc.isrc & 63); - } - - static void exe_IROL_R(RANDOMX_EXE_ARGS) { - *ibc.idst = rotl(*ibc.idst, *ibc.isrc & 63); - } - - static void exe_ISWAP_R(RANDOMX_EXE_ARGS) { - int_reg_t temp = *ibc.isrc; - *(int_reg_t*)ibc.isrc = *ibc.idst; - *ibc.idst = temp; - } - - static void exe_FSWAP_R(RANDOMX_EXE_ARGS) { - *ibc.fdst = rx_swap_vec_f128(*ibc.fdst); - } - - static void exe_FADD_R(RANDOMX_EXE_ARGS) { - *ibc.fdst = rx_add_vec_f128(*ibc.fdst, *ibc.fsrc); - } - - static void exe_FADD_M(RANDOMX_EXE_ARGS) { - rx_vec_f128 fsrc = rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc, scratchpad)); - *ibc.fdst = rx_add_vec_f128(*ibc.fdst, fsrc); - } - - static void exe_FSUB_R(RANDOMX_EXE_ARGS) { - *ibc.fdst = rx_sub_vec_f128(*ibc.fdst, *ibc.fsrc); - } - - static void exe_FSUB_M(RANDOMX_EXE_ARGS) { - rx_vec_f128 fsrc = rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc, scratchpad)); - *ibc.fdst = rx_sub_vec_f128(*ibc.fdst, fsrc); - } - - static void exe_FSCAL_R(RANDOMX_EXE_ARGS) { - const rx_vec_f128 mask = rx_set1_vec_f128(0x80F0000000000000); - *ibc.fdst = rx_xor_vec_f128(*ibc.fdst, mask); - } - - static void exe_FMUL_R(RANDOMX_EXE_ARGS) { - *ibc.fdst = rx_mul_vec_f128(*ibc.fdst, *ibc.fsrc); - } - - static void exe_FDIV_M(RANDOMX_EXE_ARGS) { - rx_vec_f128 fsrc = maskRegisterExponentMantissa( - config, - rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc, scratchpad)) - ); - *ibc.fdst = rx_div_vec_f128(*ibc.fdst, fsrc); - } - - static void exe_FSQRT_R(RANDOMX_EXE_ARGS) { - *ibc.fdst = rx_sqrt_vec_f128(*ibc.fdst); - } - - static void exe_CBRANCH(RANDOMX_EXE_ARGS) { - *ibc.idst += ibc.imm; - if ((*ibc.idst & ibc.memMask) == 0) { - pc = ibc.target; - } - } - - static void exe_CFROUND(RANDOMX_EXE_ARGS) { - rx_set_rounding_mode(rotr(*ibc.isrc, ibc.imm) % 4); - } - - static void exe_ISTORE(RANDOMX_EXE_ARGS) { - store64(scratchpad + ((*ibc.idst + ibc.imm) & ibc.memMask), *ibc.isrc); - } - protected: - static rx_vec_f128 maskRegisterExponentMantissa(ProgramConfiguration& config, rx_vec_f128 x) { - const rx_vec_f128 xmantissaMask = rx_set_vec_f128(dynamicMantissaMask, dynamicMantissaMask); - const rx_vec_f128 xexponentMask = rx_load_vec_f128((const double*)&config.eMask); - x = rx_and_vec_f128(x, xmantissaMask); - x = rx_or_vec_f128(x, xexponentMask); - return x; - } - - private: - static const int_reg_t zero; - int registerUsage[RegistersCount]; - NativeRegisterFile* nreg; - - static void* getScratchpadAddress(InstructionByteCode& ibc, uint8_t* scratchpad) { - uint32_t addr = (*ibc.isrc + ibc.imm) & ibc.memMask; - return scratchpad + addr; - } - -#ifdef RANDOMX_GEN_TABLE - static InstructionGenBytecode genTable[256]; - - void gen_IADD_RS(RANDOMX_GEN_ARGS); - void gen_IADD_M(RANDOMX_GEN_ARGS); - void gen_ISUB_R(RANDOMX_GEN_ARGS); - void gen_ISUB_M(RANDOMX_GEN_ARGS); - void gen_IMUL_R(RANDOMX_GEN_ARGS); - void gen_IMUL_M(RANDOMX_GEN_ARGS); - void gen_IMULH_R(RANDOMX_GEN_ARGS); - void gen_IMULH_M(RANDOMX_GEN_ARGS); - void gen_ISMULH_R(RANDOMX_GEN_ARGS); - void gen_ISMULH_M(RANDOMX_GEN_ARGS); - void gen_IMUL_RCP(RANDOMX_GEN_ARGS); - void gen_INEG_R(RANDOMX_GEN_ARGS); - void gen_IXOR_R(RANDOMX_GEN_ARGS); - void gen_IXOR_M(RANDOMX_GEN_ARGS); - void gen_IROR_R(RANDOMX_GEN_ARGS); - void gen_IROL_R(RANDOMX_GEN_ARGS); - void gen_ISWAP_R(RANDOMX_GEN_ARGS); - void gen_FSWAP_R(RANDOMX_GEN_ARGS); - void gen_FADD_R(RANDOMX_GEN_ARGS); - void gen_FADD_M(RANDOMX_GEN_ARGS); - void gen_FSUB_R(RANDOMX_GEN_ARGS); - void gen_FSUB_M(RANDOMX_GEN_ARGS); - void gen_FSCAL_R(RANDOMX_GEN_ARGS); - void gen_FMUL_R(RANDOMX_GEN_ARGS); - void gen_FDIV_M(RANDOMX_GEN_ARGS); - void gen_FSQRT_R(RANDOMX_GEN_ARGS); - void gen_CBRANCH(RANDOMX_GEN_ARGS); - void gen_CFROUND(RANDOMX_GEN_ARGS); - void gen_ISTORE(RANDOMX_GEN_ARGS); - void gen_NOP(RANDOMX_GEN_ARGS); -#endif - }; -} diff --git a/external/src/randomx/src/common.hpp b/external/src/randomx/src/common.hpp deleted file mode 100644 index a77feb3..0000000 --- a/external/src/randomx/src/common.hpp +++ /dev/null @@ -1,187 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include -#include -#include -#include "blake2/endian.h" -#include "configuration.h" -#include "randomx.h" - -namespace randomx { - - static_assert(RANDOMX_ARGON_MEMORY >= 8, "RANDOMX_ARGON_MEMORY must be at least 8."); - static_assert(RANDOMX_ARGON_MEMORY <= 2097152, "RANDOMX_ARGON_MEMORY must not exceed 2097152."); - static_assert((RANDOMX_ARGON_MEMORY & (RANDOMX_ARGON_MEMORY - 1)) == 0, "RANDOMX_ARGON_MEMORY must be a power of 2."); - static_assert(RANDOMX_ARGON_ITERATIONS > 0 && RANDOMX_ARGON_ITERATIONS < UINT32_MAX, "RANDOMX_ARGON_ITERATIONS must be a positive 32-bit integer."); - static_assert(RANDOMX_ARGON_LANES > 0 && RANDOMX_ARGON_LANES <= 16777215, "RANDOMX_ARGON_LANES out of range"); - static_assert(RANDOMX_DATASET_BASE_SIZE >= 64, "RANDOMX_DATASET_BASE_SIZE must be at least 64."); - static_assert((RANDOMX_DATASET_BASE_SIZE & (RANDOMX_DATASET_BASE_SIZE - 1)) == 0, "RANDOMX_DATASET_BASE_SIZE must be a power of 2."); - static_assert(RANDOMX_DATASET_BASE_SIZE <= 4294967296ULL, "RANDOMX_DATASET_BASE_SIZE must not exceed 4294967296."); - static_assert(RANDOMX_DATASET_EXTRA_SIZE % 64 == 0, "RANDOMX_DATASET_EXTRA_SIZE must be divisible by 64."); - static_assert((uint64_t)RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE <= 17179869184, "Dataset size must not exceed 16 GiB."); - static_assert(RANDOMX_PROGRAM_SIZE > 0, "RANDOMX_PROGRAM_SIZE must be greater than 0"); - static_assert(RANDOMX_PROGRAM_SIZE <= 32768, "RANDOMX_PROGRAM_SIZE must not exceed 32768"); - static_assert(RANDOMX_PROGRAM_ITERATIONS > 0, "RANDOMX_PROGRAM_ITERATIONS must be greater than 0"); - static_assert(RANDOMX_PROGRAM_COUNT > 0, "RANDOMX_PROGRAM_COUNT must be greater than 0"); - static_assert((RANDOMX_SCRATCHPAD_L3 & (RANDOMX_SCRATCHPAD_L3 - 1)) == 0, "RANDOMX_SCRATCHPAD_L3 must be a power of 2."); - static_assert(RANDOMX_SCRATCHPAD_L3 >= RANDOMX_SCRATCHPAD_L2, "RANDOMX_SCRATCHPAD_L3 must be greater than or equal to RANDOMX_SCRATCHPAD_L2."); - static_assert((RANDOMX_SCRATCHPAD_L2 & (RANDOMX_SCRATCHPAD_L2 - 1)) == 0, "RANDOMX_SCRATCHPAD_L2 must be a power of 2."); - static_assert(RANDOMX_SCRATCHPAD_L2 >= RANDOMX_SCRATCHPAD_L1, "RANDOMX_SCRATCHPAD_L2 must be greater than or equal to RANDOMX_SCRATCHPAD_L1."); - static_assert(RANDOMX_SCRATCHPAD_L1 >= 64, "RANDOMX_SCRATCHPAD_L1 must be at least 64."); - static_assert((RANDOMX_SCRATCHPAD_L1 & (RANDOMX_SCRATCHPAD_L1 - 1)) == 0, "RANDOMX_SCRATCHPAD_L1 must be a power of 2."); - static_assert(RANDOMX_CACHE_ACCESSES > 1, "RANDOMX_CACHE_ACCESSES must be greater than 1"); - static_assert(RANDOMX_SUPERSCALAR_LATENCY > 0, "RANDOMX_SUPERSCALAR_LATENCY must be greater than 0"); - static_assert(RANDOMX_SUPERSCALAR_LATENCY <= 10000, "RANDOMX_SUPERSCALAR_LATENCY must not exceed 10000"); - static_assert(RANDOMX_JUMP_BITS > 0, "RANDOMX_JUMP_BITS must be greater than 0."); - static_assert(RANDOMX_JUMP_OFFSET >= 0, "RANDOMX_JUMP_OFFSET must be greater than or equal to 0."); - static_assert(RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET <= 16, "RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET must not exceed 16."); - - constexpr int wtSum = RANDOMX_FREQ_IADD_RS + RANDOMX_FREQ_IADD_M + RANDOMX_FREQ_ISUB_R + \ - RANDOMX_FREQ_ISUB_M + RANDOMX_FREQ_IMUL_R + RANDOMX_FREQ_IMUL_M + RANDOMX_FREQ_IMULH_R + \ - RANDOMX_FREQ_IMULH_M + RANDOMX_FREQ_ISMULH_R + RANDOMX_FREQ_ISMULH_M + RANDOMX_FREQ_IMUL_RCP + \ - RANDOMX_FREQ_INEG_R + RANDOMX_FREQ_IXOR_R + RANDOMX_FREQ_IXOR_M + RANDOMX_FREQ_IROR_R + RANDOMX_FREQ_IROL_R + RANDOMX_FREQ_ISWAP_R + \ - RANDOMX_FREQ_FSWAP_R + RANDOMX_FREQ_FADD_R + RANDOMX_FREQ_FADD_M + RANDOMX_FREQ_FSUB_R + RANDOMX_FREQ_FSUB_M + \ - RANDOMX_FREQ_FSCAL_R + RANDOMX_FREQ_FMUL_R + RANDOMX_FREQ_FDIV_M + RANDOMX_FREQ_FSQRT_R + RANDOMX_FREQ_CBRANCH + \ - RANDOMX_FREQ_CFROUND + RANDOMX_FREQ_ISTORE + RANDOMX_FREQ_NOP; - - static_assert(wtSum == 256, "Sum of instruction frequencies must be 256."); - - - constexpr uint32_t ArgonBlockSize = 1024; - constexpr int ArgonSaltSize = sizeof("" RANDOMX_ARGON_SALT) - 1; - static_assert(ArgonSaltSize >= 8, "RANDOMX_ARGON_SALT must be at least 8 characters long"); - constexpr int SuperscalarMaxSize = 3 * RANDOMX_SUPERSCALAR_LATENCY + 2; - constexpr size_t CacheLineSize = RANDOMX_DATASET_ITEM_SIZE; - constexpr int ScratchpadSize = RANDOMX_SCRATCHPAD_L3; - constexpr uint32_t CacheLineAlignMask = (RANDOMX_DATASET_BASE_SIZE - 1) & ~(CacheLineSize - 1); - constexpr uint32_t CacheSize = RANDOMX_ARGON_MEMORY * ArgonBlockSize; - constexpr uint64_t DatasetSize = RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE; - constexpr uint32_t DatasetExtraItems = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE; - constexpr uint32_t ConditionMask = ((1 << RANDOMX_JUMP_BITS) - 1); - constexpr int ConditionOffset = RANDOMX_JUMP_OFFSET; - constexpr int StoreL3Condition = 14; - - //Prevent some unsafe configurations. -#ifndef RANDOMX_UNSAFE - static_assert((uint64_t)ArgonBlockSize * RANDOMX_CACHE_ACCESSES * RANDOMX_ARGON_MEMORY + 33554432 >= (uint64_t)RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE, "Unsafe configuration: Memory-time tradeoffs"); - static_assert((128 + RANDOMX_PROGRAM_SIZE * RANDOMX_FREQ_ISTORE / 256) * (RANDOMX_PROGRAM_COUNT * RANDOMX_PROGRAM_ITERATIONS) >= RANDOMX_SCRATCHPAD_L3, "Unsafe configuration: Insufficient Scratchpad writes"); - static_assert(RANDOMX_PROGRAM_COUNT > 1, "Unsafe configuration: Program filtering strategies"); - static_assert(RANDOMX_PROGRAM_SIZE >= 64, "Unsafe configuration: Low program entropy"); - static_assert(RANDOMX_PROGRAM_ITERATIONS >= 400, "Unsafe configuration: High compilation overhead"); -#endif - -#ifdef TRACE - constexpr bool trace = true; -#else - constexpr bool trace = false; -#endif - -#ifndef UNREACHABLE -#ifdef __GNUC__ -#define UNREACHABLE __builtin_unreachable() -#elif _MSC_VER -#define UNREACHABLE __assume(false) -#else -#define UNREACHABLE -#endif -#endif - -#if defined(_M_X64) || defined(__x86_64__) - #define RANDOMX_HAVE_COMPILER 1 - class JitCompilerX86; - using JitCompiler = JitCompilerX86; -#elif defined(__aarch64__) - #define RANDOMX_HAVE_COMPILER 1 - class JitCompilerA64; - using JitCompiler = JitCompilerA64; -#else - #define RANDOMX_HAVE_COMPILER 0 - class JitCompilerFallback; - using JitCompiler = JitCompilerFallback; -#endif - - using addr_t = uint32_t; - - using int_reg_t = uint64_t; - - struct fpu_reg_t { - double lo; - double hi; - }; - - constexpr uint32_t ScratchpadL1 = RANDOMX_SCRATCHPAD_L1 / sizeof(int_reg_t); - constexpr uint32_t ScratchpadL2 = RANDOMX_SCRATCHPAD_L2 / sizeof(int_reg_t); - constexpr uint32_t ScratchpadL3 = RANDOMX_SCRATCHPAD_L3 / sizeof(int_reg_t); - constexpr int ScratchpadL1Mask = (ScratchpadL1 - 1) * 8; - constexpr int ScratchpadL2Mask = (ScratchpadL2 - 1) * 8; - constexpr int ScratchpadL1Mask16 = (ScratchpadL1 / 2 - 1) * 16; - constexpr int ScratchpadL2Mask16 = (ScratchpadL2 / 2 - 1) * 16; - constexpr int ScratchpadL3Mask = (ScratchpadL3 - 1) * 8; - constexpr int ScratchpadL3Mask64 = (ScratchpadL3 / 8 - 1) * 64; - constexpr int RegistersCount = 8; - constexpr int RegisterCountFlt = RegistersCount / 2; - constexpr int RegisterNeedsDisplacement = 5; //x86 r13 register - constexpr int RegisterNeedsSib = 4; //x86 r12 register - - inline bool isZeroOrPowerOf2(uint64_t x) { - return (x & (x - 1)) == 0; - } - - constexpr int mantissaSize = 52; - constexpr int exponentSize = 11; - constexpr uint64_t mantissaMask = (1ULL << mantissaSize) - 1; - constexpr uint64_t exponentMask = (1ULL << exponentSize) - 1; - constexpr int exponentBias = 1023; - constexpr int dynamicExponentBits = 4; - constexpr int staticExponentBits = 4; - constexpr uint64_t constExponentBits = 0x300; - constexpr uint64_t dynamicMantissaMask = (1ULL << (mantissaSize + dynamicExponentBits)) - 1; - - struct MemoryRegisters { - addr_t mx, ma; - uint8_t* memory = nullptr; - }; - - //register file in little-endian byte order - struct RegisterFile { - int_reg_t r[RegistersCount]; - fpu_reg_t f[RegisterCountFlt]; - fpu_reg_t e[RegisterCountFlt]; - fpu_reg_t a[RegisterCountFlt]; - }; - - typedef void(ProgramFunc)(RegisterFile&, MemoryRegisters&, uint8_t* /* scratchpad */, uint64_t); - typedef void(DatasetInitFunc)(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock); - - typedef void(DatasetDeallocFunc)(randomx_dataset*); - typedef void(CacheDeallocFunc)(randomx_cache*); - typedef void(CacheInitializeFunc)(randomx_cache*, const void*, size_t); -} diff --git a/external/src/randomx/src/configuration.h b/external/src/randomx/src/configuration.h deleted file mode 100644 index 84400dd..0000000 --- a/external/src/randomx/src/configuration.h +++ /dev/null @@ -1,125 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -//Cache size in KiB. Must be a power of 2. -#define RANDOMX_ARGON_MEMORY 262144 - -//Number of Argon2d iterations for Cache initialization. -#define RANDOMX_ARGON_ITERATIONS 3 - -//Number of parallel lanes for Cache initialization. -#define RANDOMX_ARGON_LANES 1 - -//Argon2d salt -#define RANDOMX_ARGON_SALT "RandomX\x03" - -//Number of random Cache accesses per Dataset item. Minimum is 2. -#define RANDOMX_CACHE_ACCESSES 8 - -//Target latency for SuperscalarHash (in cycles of the reference CPU). -#define RANDOMX_SUPERSCALAR_LATENCY 170 - -//Dataset base size in bytes. Must be a power of 2. -#define RANDOMX_DATASET_BASE_SIZE 2147483648 - -//Dataset extra size. Must be divisible by 64. -#define RANDOMX_DATASET_EXTRA_SIZE 33554368 - -//Number of instructions in a RandomX program. Must be divisible by 8. -#define RANDOMX_PROGRAM_SIZE 256 - -//Number of iterations during VM execution. -#define RANDOMX_PROGRAM_ITERATIONS 2048 - -//Number of chained VM executions per hash. -#define RANDOMX_PROGRAM_COUNT 8 - -//Scratchpad L3 size in bytes. Must be a power of 2. -#define RANDOMX_SCRATCHPAD_L3 2097152 - -//Scratchpad L2 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L3. -#define RANDOMX_SCRATCHPAD_L2 262144 - -//Scratchpad L1 size in bytes. Must be a power of two (minimum 64) and less than or equal to RANDOMX_SCRATCHPAD_L2. -#define RANDOMX_SCRATCHPAD_L1 16384 - -//Jump condition mask size in bits. -#define RANDOMX_JUMP_BITS 8 - -//Jump condition mask offset in bits. The sum of RANDOMX_JUMP_BITS and RANDOMX_JUMP_OFFSET must not exceed 16. -#define RANDOMX_JUMP_OFFSET 8 - -/* -Instruction frequencies (per 256 opcodes) -Total sum of frequencies must be 256 -*/ - -//Integer instructions -#define RANDOMX_FREQ_IADD_RS 16 -#define RANDOMX_FREQ_IADD_M 7 -#define RANDOMX_FREQ_ISUB_R 16 -#define RANDOMX_FREQ_ISUB_M 7 -#define RANDOMX_FREQ_IMUL_R 16 -#define RANDOMX_FREQ_IMUL_M 4 -#define RANDOMX_FREQ_IMULH_R 4 -#define RANDOMX_FREQ_IMULH_M 1 -#define RANDOMX_FREQ_ISMULH_R 4 -#define RANDOMX_FREQ_ISMULH_M 1 -#define RANDOMX_FREQ_IMUL_RCP 8 -#define RANDOMX_FREQ_INEG_R 2 -#define RANDOMX_FREQ_IXOR_R 15 -#define RANDOMX_FREQ_IXOR_M 5 -#define RANDOMX_FREQ_IROR_R 8 -#define RANDOMX_FREQ_IROL_R 2 -#define RANDOMX_FREQ_ISWAP_R 4 - -//Floating point instructions -#define RANDOMX_FREQ_FSWAP_R 4 -#define RANDOMX_FREQ_FADD_R 16 -#define RANDOMX_FREQ_FADD_M 5 -#define RANDOMX_FREQ_FSUB_R 16 -#define RANDOMX_FREQ_FSUB_M 5 -#define RANDOMX_FREQ_FSCAL_R 6 -#define RANDOMX_FREQ_FMUL_R 32 -#define RANDOMX_FREQ_FDIV_M 4 -#define RANDOMX_FREQ_FSQRT_R 6 - -//Control instructions -#define RANDOMX_FREQ_CBRANCH 25 -#define RANDOMX_FREQ_CFROUND 1 - -//Store instruction -#define RANDOMX_FREQ_ISTORE 16 - -//No-op instruction -#define RANDOMX_FREQ_NOP 0 -/* ------ - 256 -*/ diff --git a/external/src/randomx/src/cpu.cpp b/external/src/randomx/src/cpu.cpp deleted file mode 100644 index ff79fe0..0000000 --- a/external/src/randomx/src/cpu.cpp +++ /dev/null @@ -1,76 +0,0 @@ -/* -Copyright (c) 2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include "cpu.hpp" - -#if defined(_M_X64) || defined(__x86_64__) - #define HAVE_CPUID - #ifdef _WIN32 - #include - #define cpuid(info, x) __cpuidex(info, x, 0) - #else //GCC - #include - void cpuid(int info[4], int InfoType) { - __cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]); - } - #endif -#endif - -#if defined(HAVE_HWCAP) - #include - #include -#endif - -namespace randomx { - - Cpu::Cpu() : aes_(false), ssse3_(false), avx2_(false) { -#ifdef HAVE_CPUID - int info[4]; - cpuid(info, 0); - int nIds = info[0]; - if (nIds >= 0x00000001) { - cpuid(info, 0x00000001); - ssse3_ = (info[2] & (1 << 9)) != 0; - aes_ = (info[2] & (1 << 25)) != 0; - } - if (nIds >= 0x00000007) { - cpuid(info, 0x00000007); - avx2_ = (info[1] & (1 << 5)) != 0; - } -#elif defined(__aarch64__) - #if defined(HWCAP_AES) - long hwcaps = getauxval(AT_HWCAP); - aes_ = (hwcaps & HWCAP_AES) != 0; - #elif defined(__APPLE__) - aes_ = true; - #endif -#endif - //TODO POWER8 AES - } - -} diff --git a/external/src/randomx/src/cpu.hpp b/external/src/randomx/src/cpu.hpp deleted file mode 100644 index 516dd47..0000000 --- a/external/src/randomx/src/cpu.hpp +++ /dev/null @@ -1,49 +0,0 @@ -/* -Copyright (c) 2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -namespace randomx { - - class Cpu { - public: - Cpu(); - bool hasAes() const { - return aes_; - } - bool hasSsse3() const { - return ssse3_; - } - bool hasAvx2() const { - return avx2_; - } - private: - bool aes_, ssse3_, avx2_; - }; - -} diff --git a/external/src/randomx/src/dataset.cpp b/external/src/randomx/src/dataset.cpp deleted file mode 100644 index 675c5ab..0000000 --- a/external/src/randomx/src/dataset.cpp +++ /dev/null @@ -1,196 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* Original code from Argon2 reference source code package used under CC0 Licence - * https://github.com/P-H-C/phc-winner-argon2 - * Copyright 2015 - * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves -*/ - -#include -#include -#include -#include -#include -#include -#include - -#include "common.hpp" -#include "dataset.hpp" -#include "virtual_memory.hpp" -#include "superscalar.hpp" -#include "blake2_generator.hpp" -#include "reciprocal.h" -#include "blake2/endian.h" -#include "argon2.h" -#include "argon2_core.h" -#include "jit_compiler.hpp" -#include "intrin_portable.h" - -static_assert(RANDOMX_ARGON_MEMORY % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_MEMORY - invalid value"); -static_assert(ARGON2_BLOCK_SIZE == randomx::ArgonBlockSize, "Unpexpected value of ARGON2_BLOCK_SIZE"); - -namespace randomx { - - template - void deallocCache(randomx_cache* cache) { - if (cache->memory != nullptr) - Allocator::freeMemory(cache->memory, CacheSize); - if (cache->jit != nullptr) - delete cache->jit; - } - - template void deallocCache(randomx_cache* cache); - template void deallocCache(randomx_cache* cache); - - void initCache(randomx_cache* cache, const void* key, size_t keySize) { - uint32_t memory_blocks, segment_length; - argon2_instance_t instance; - argon2_context context; - - context.out = nullptr; - context.outlen = 0; - context.pwd = CONST_CAST(uint8_t *)key; - context.pwdlen = (uint32_t)keySize; - context.salt = CONST_CAST(uint8_t *)RANDOMX_ARGON_SALT; - context.saltlen = (uint32_t)randomx::ArgonSaltSize; - context.secret = NULL; - context.secretlen = 0; - context.ad = NULL; - context.adlen = 0; - context.t_cost = RANDOMX_ARGON_ITERATIONS; - context.m_cost = RANDOMX_ARGON_MEMORY; - context.lanes = RANDOMX_ARGON_LANES; - context.threads = 1; - context.allocate_cbk = NULL; - context.free_cbk = NULL; - context.flags = ARGON2_DEFAULT_FLAGS; - context.version = ARGON2_VERSION_NUMBER; - - int inputsValid = randomx_argon2_validate_inputs(&context); - assert(inputsValid == ARGON2_OK); - - /* 2. Align memory size */ - /* Minimum memory_blocks = 8L blocks, where L is the number of lanes */ - memory_blocks = context.m_cost; - - segment_length = memory_blocks / (context.lanes * ARGON2_SYNC_POINTS); - - instance.version = context.version; - instance.memory = NULL; - instance.passes = context.t_cost; - instance.memory_blocks = memory_blocks; - instance.segment_length = segment_length; - instance.lane_length = segment_length * ARGON2_SYNC_POINTS; - instance.lanes = context.lanes; - instance.threads = context.threads; - instance.type = Argon2_d; - instance.memory = (block*)cache->memory; - instance.impl = cache->argonImpl; - - if (instance.threads > instance.lanes) { - instance.threads = instance.lanes; - } - - /* 3. Initialization: Hashing inputs, allocating memory, filling first - * blocks - */ - randomx_argon2_initialize(&instance, &context); - - randomx_argon2_fill_memory_blocks(&instance); - - cache->reciprocalCache.clear(); - randomx::Blake2Generator gen(key, keySize); - for (int i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { - randomx::generateSuperscalar(cache->programs[i], gen); - for (unsigned j = 0; j < cache->programs[i].getSize(); ++j) { - auto& instr = cache->programs[i](j); - if ((SuperscalarInstructionType)instr.opcode == SuperscalarInstructionType::IMUL_RCP) { - auto rcp = randomx_reciprocal(instr.getImm32()); - instr.setImm32(cache->reciprocalCache.size()); - cache->reciprocalCache.push_back(rcp); - } - } - } - } - - void initCacheCompile(randomx_cache* cache, const void* key, size_t keySize) { - initCache(cache, key, keySize); - cache->jit->enableWriting(); - cache->jit->generateSuperscalarHash(cache->programs, cache->reciprocalCache); - cache->jit->generateDatasetInitCode(); - cache->jit->enableExecution(); - } - - constexpr uint64_t superscalarMul0 = 6364136223846793005ULL; - constexpr uint64_t superscalarAdd1 = 9298411001130361340ULL; - constexpr uint64_t superscalarAdd2 = 12065312585734608966ULL; - constexpr uint64_t superscalarAdd3 = 9306329213124626780ULL; - constexpr uint64_t superscalarAdd4 = 5281919268842080866ULL; - constexpr uint64_t superscalarAdd5 = 10536153434571861004ULL; - constexpr uint64_t superscalarAdd6 = 3398623926847679864ULL; - constexpr uint64_t superscalarAdd7 = 9549104520008361294ULL; - - static inline uint8_t* getMixBlock(uint64_t registerValue, uint8_t *memory) { - constexpr uint32_t mask = CacheSize / CacheLineSize - 1; - return memory + (registerValue & mask) * CacheLineSize; - } - - void initDatasetItem(randomx_cache* cache, uint8_t* out, uint64_t itemNumber) { - int_reg_t rl[8]; - uint8_t* mixBlock; - uint64_t registerValue = itemNumber; - rl[0] = (itemNumber + 1) * superscalarMul0; - rl[1] = rl[0] ^ superscalarAdd1; - rl[2] = rl[0] ^ superscalarAdd2; - rl[3] = rl[0] ^ superscalarAdd3; - rl[4] = rl[0] ^ superscalarAdd4; - rl[5] = rl[0] ^ superscalarAdd5; - rl[6] = rl[0] ^ superscalarAdd6; - rl[7] = rl[0] ^ superscalarAdd7; - for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { - mixBlock = getMixBlock(registerValue, cache->memory); - rx_prefetch_nta(mixBlock); - SuperscalarProgram& prog = cache->programs[i]; - - executeSuperscalar(rl, prog, &cache->reciprocalCache); - - for (unsigned q = 0; q < 8; ++q) - rl[q] ^= load64_native(mixBlock + 8 * q); - - registerValue = rl[prog.getAddressRegister()]; - } - - memcpy(out, &rl, CacheLineSize); - } - - void initDataset(randomx_cache* cache, uint8_t* dataset, uint32_t startItem, uint32_t endItem) { - for (uint32_t itemNumber = startItem; itemNumber < endItem; ++itemNumber, dataset += CacheLineSize) - initDatasetItem(cache, dataset, itemNumber); - } -} diff --git a/external/src/randomx/src/dataset.hpp b/external/src/randomx/src/dataset.hpp deleted file mode 100644 index a1f0a48..0000000 --- a/external/src/randomx/src/dataset.hpp +++ /dev/null @@ -1,103 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include -#include -#include -#include "common.hpp" -#include "superscalar_program.hpp" -#include "allocator.hpp" -#include "argon2.h" - -/* Global scope for C binding */ -struct randomx_dataset { - uint8_t* memory = nullptr; - randomx::DatasetDeallocFunc* dealloc; -}; - -/* Global scope for C binding */ -struct randomx_cache { - uint8_t* memory = nullptr; - randomx::CacheDeallocFunc* dealloc; - randomx::JitCompiler* jit; - randomx::CacheInitializeFunc* initialize; - randomx::DatasetInitFunc* datasetInit; - randomx::SuperscalarProgram programs[RANDOMX_CACHE_ACCESSES]; - std::vector reciprocalCache; - std::string cacheKey; - randomx_argon2_impl* argonImpl; - - bool isInitialized() { - return programs[0].getSize() != 0; - } -}; - -//A pointer to a standard-layout struct object points to its initial member -static_assert(std::is_standard_layout(), "randomx_dataset must be a standard-layout struct"); - -//the following assert fails when compiling Debug in Visual Studio (JIT mode will crash in Debug) -#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && defined(_DEBUG) -//#define TO_STR(x) #x -//#define STR(x) TO_STR(x) -//#pragma message ( __FILE__ "(" STR(__LINE__) ") warning: check std::is_standard_layout() is disabled for Debug configuration. JIT mode will crash." ) -//#undef STR -//#undef TO_STR -#else -static_assert(std::is_standard_layout(), "randomx_cache must be a standard-layout struct"); -#endif - -namespace randomx { - - using DefaultAllocator = AlignedAllocator; - - template - void deallocDataset(randomx_dataset* dataset) { - if (dataset->memory != nullptr) - Allocator::freeMemory(dataset->memory, DatasetSize); - } - - template - void deallocCache(randomx_cache* cache); - - void initCache(randomx_cache*, const void*, size_t); - void initCacheCompile(randomx_cache*, const void*, size_t); - void initDatasetItem(randomx_cache* cache, uint8_t* out, uint64_t blockNumber); - void initDataset(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock); - - inline randomx_argon2_impl* selectArgonImpl(randomx_flags flags) { - if (flags & RANDOMX_FLAG_ARGON2_AVX2) { - return randomx_argon2_impl_avx2(); - } - if (flags & RANDOMX_FLAG_ARGON2_SSSE3) { - return randomx_argon2_impl_ssse3(); - } - return &randomx_argon2_fill_segment_ref; - } -} diff --git a/external/src/randomx/src/instruction.cpp b/external/src/randomx/src/instruction.cpp deleted file mode 100644 index 12e6f49..0000000 --- a/external/src/randomx/src/instruction.cpp +++ /dev/null @@ -1,390 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include "instruction.hpp" -#include "common.hpp" - -namespace randomx { - - void Instruction::print(std::ostream& os) const { - os << names[opcode] << " "; - auto handler = engine[opcode]; - (this->*handler)(os); - } - - void Instruction::genAddressReg(std::ostream& os, int srcIndex) const { - os << (getModMem() ? "L1" : "L2") << "[r" << srcIndex << std::showpos << (int32_t)getImm32() << std::noshowpos << "]"; - } - - void Instruction::genAddressRegDst(std::ostream& os, int dstIndex) const { - if (getModCond() < StoreL3Condition) - os << (getModMem() ? "L1" : "L2"); - else - os << "L3"; - os << "[r" << dstIndex << std::showpos << (int32_t)getImm32() << std::noshowpos << "]"; - } - - void Instruction::genAddressImm(std::ostream& os) const { - os << "L3" << "[" << (getImm32() & ScratchpadL3Mask) << "]"; - } - - void Instruction::h_IADD_RS(std::ostream& os) const { - auto dstIndex = dst % RegistersCount; - auto srcIndex = src % RegistersCount; - os << "r" << dstIndex << ", r" << srcIndex; - if(dstIndex == RegisterNeedsDisplacement) { - os << ", " << (int32_t)getImm32(); - } - os << ", SHFT " << getModShift() << std::endl; - } - - void Instruction::h_IADD_M(std::ostream& os) const { - auto dstIndex = dst % RegistersCount; - auto srcIndex = src % RegistersCount; - if (dstIndex != srcIndex) { - os << "r" << dstIndex << ", "; - genAddressReg(os, srcIndex); - os << std::endl; - } - else { - os << "r" << dstIndex << ", "; - genAddressImm(os); - os << std::endl; - } - } - - void Instruction::h_ISUB_R(std::ostream& os) const { - auto dstIndex = dst % RegistersCount; - auto srcIndex = src % RegistersCount; - if (dstIndex != srcIndex) { - os << "r" << dstIndex << ", r" << srcIndex << std::endl; - } - else { - os << "r" << dstIndex << ", " << (int32_t)getImm32() << std::endl; - } - } - - void Instruction::h_ISUB_M(std::ostream& os) const { - auto dstIndex = dst % RegistersCount; - auto srcIndex = src % RegistersCount; - if (dstIndex != srcIndex) { - os << "r" << dstIndex << ", "; - genAddressReg(os, srcIndex); - os << std::endl; - } - else { - os << "r" << dstIndex << ", "; - genAddressImm(os); - os << std::endl; - } - } - - void Instruction::h_IMUL_R(std::ostream& os) const { - auto dstIndex = dst % RegistersCount; - auto srcIndex = src % RegistersCount; - if (dstIndex != srcIndex) { - os << "r" << dstIndex << ", r" << srcIndex << std::endl; - } - else { - os << "r" << dstIndex << ", " << (int32_t)getImm32() << std::endl; - } - } - - void Instruction::h_IMUL_M(std::ostream& os) const { - auto dstIndex = dst % RegistersCount; - auto srcIndex = src % RegistersCount; - if (dstIndex != srcIndex) { - os << "r" << dstIndex << ", "; - genAddressReg(os, srcIndex); - os << std::endl; - } - else { - os << "r" << dstIndex << ", "; - genAddressImm(os); - os << std::endl; - } - } - - void Instruction::h_IMULH_R(std::ostream& os) const { - auto dstIndex = dst % RegistersCount; - auto srcIndex = src % RegistersCount; - os << "r" << dstIndex << ", r" << srcIndex << std::endl; - } - - void Instruction::h_IMULH_M(std::ostream& os) const { - auto dstIndex = dst % RegistersCount; - auto srcIndex = src % RegistersCount; - if (dstIndex != srcIndex) { - os << "r" << dstIndex << ", "; - genAddressReg(os, srcIndex); - os << std::endl; - } - else { - os << "r" << dstIndex << ", "; - genAddressImm(os); - os << std::endl; - } - } - - void Instruction::h_ISMULH_R(std::ostream& os) const { - auto dstIndex = dst % RegistersCount; - auto srcIndex = src % RegistersCount; - os << "r" << dstIndex << ", r" << srcIndex << std::endl; - } - - void Instruction::h_ISMULH_M(std::ostream& os) const { - auto dstIndex = dst % RegistersCount; - auto srcIndex = src % RegistersCount; - if (dstIndex != srcIndex) { - os << "r" << dstIndex << ", "; - genAddressReg(os, srcIndex); - os << std::endl; - } - else { - os << "r" << dstIndex << ", "; - genAddressImm(os); - os << std::endl; - } - } - - void Instruction::h_INEG_R(std::ostream& os) const { - auto dstIndex = dst % RegistersCount; - os << "r" << dstIndex << std::endl; - } - - void Instruction::h_IXOR_R(std::ostream& os) const { - auto dstIndex = dst % RegistersCount; - auto srcIndex = src % RegistersCount; - if (dstIndex != srcIndex) { - os << "r" << dstIndex << ", r" << srcIndex << std::endl; - } - else { - os << "r" << dstIndex << ", " << (int32_t)getImm32() << std::endl; - } - } - - void Instruction::h_IXOR_M(std::ostream& os) const { - auto dstIndex = dst % RegistersCount; - auto srcIndex = src % RegistersCount; - if (dstIndex != srcIndex) { - os << "r" << dstIndex << ", "; - genAddressReg(os, srcIndex); - os << std::endl; - } - else { - os << "r" << dstIndex << ", "; - genAddressImm(os); - os << std::endl; - } - } - - void Instruction::h_IROR_R(std::ostream& os) const { - auto dstIndex = dst % RegistersCount; - auto srcIndex = src % RegistersCount; - if (dstIndex != srcIndex) { - os << "r" << dstIndex << ", r" << srcIndex << std::endl; - } - else { - os << "r" << dstIndex << ", " << (getImm32() & 63) << std::endl; - } - } - - void Instruction::h_IROL_R(std::ostream& os) const { - auto dstIndex = dst % RegistersCount; - auto srcIndex = src % RegistersCount; - if (dstIndex != srcIndex) { - os << "r" << dstIndex << ", r" << srcIndex << std::endl; - } - else { - os << "r" << dstIndex << ", " << (getImm32() & 63) << std::endl; - } - } - - void Instruction::h_IMUL_RCP(std::ostream& os) const { - auto dstIndex = dst % RegistersCount; - os << "r" << dstIndex << ", " << getImm32() << std::endl; - } - - void Instruction::h_ISWAP_R(std::ostream& os) const { - auto dstIndex = dst % RegistersCount; - auto srcIndex = src % RegistersCount; - os << "r" << dstIndex << ", r" << srcIndex << std::endl; - } - - void Instruction::h_FSWAP_R(std::ostream& os) const { - auto dstIndex = dst % RegistersCount; - const char reg = (dstIndex >= RegisterCountFlt) ? 'e' : 'f'; - dstIndex %= RegisterCountFlt; - os << reg << dstIndex << std::endl; - } - - void Instruction::h_FADD_R(std::ostream& os) const { - auto dstIndex = dst % RegisterCountFlt; - auto srcIndex = src % RegisterCountFlt; - os << "f" << dstIndex << ", a" << srcIndex << std::endl; - } - - void Instruction::h_FADD_M(std::ostream& os) const { - auto dstIndex = dst % RegisterCountFlt; - auto srcIndex = src % RegistersCount; - os << "f" << dstIndex << ", "; - genAddressReg(os, srcIndex); - os << std::endl; - } - - void Instruction::h_FSUB_R(std::ostream& os) const { - auto dstIndex = dst % RegisterCountFlt; - auto srcIndex = src % RegisterCountFlt; - os << "f" << dstIndex << ", a" << srcIndex << std::endl; - } - - void Instruction::h_FSUB_M(std::ostream& os) const { - auto dstIndex = dst % RegisterCountFlt; - auto srcIndex = src % RegistersCount; - os << "f" << dstIndex << ", "; - genAddressReg(os, srcIndex); - os << std::endl; - } - - void Instruction::h_FSCAL_R(std::ostream& os) const { - auto dstIndex = dst % RegisterCountFlt; - os << "f" << dstIndex << std::endl; - } - - void Instruction::h_FMUL_R(std::ostream& os) const { - auto dstIndex = dst % RegisterCountFlt; - auto srcIndex = src % RegisterCountFlt; - os << "e" << dstIndex << ", a" << srcIndex << std::endl; - } - - void Instruction::h_FDIV_M(std::ostream& os) const { - auto dstIndex = dst % RegisterCountFlt; - auto srcIndex = src % RegistersCount; - os << "e" << dstIndex << ", "; - genAddressReg(os, srcIndex); - os << std::endl; - } - - void Instruction::h_FSQRT_R(std::ostream& os) const { - auto dstIndex = dst % RegisterCountFlt; - os << "e" << dstIndex << std::endl; - } - - void Instruction::h_CFROUND(std::ostream& os) const { - auto srcIndex = src % RegistersCount; - os << "r" << srcIndex << ", " << (getImm32() & 63) << std::endl; - } - - void Instruction::h_CBRANCH(std::ostream& os) const { - auto dstIndex = dst % RegistersCount; - auto srcIndex = src % RegistersCount; - os << "r" << dstIndex << ", " << (int32_t)getImm32() << ", COND " << (int)(getModCond()) << std::endl; - } - - void Instruction::h_ISTORE(std::ostream& os) const { - auto dstIndex = dst % RegistersCount; - auto srcIndex = src % RegistersCount; - genAddressRegDst(os, dstIndex); - os << ", r" << srcIndex << std::endl; - } - - void Instruction::h_NOP(std::ostream& os) const { - os << std::endl; - } - -#include "instruction_weights.hpp" -#define INST_NAME(x) REPN(#x, WT(x)) -#define INST_HANDLE(x) REPN(&Instruction::h_##x, WT(x)) - - const char* Instruction::names[256] = { - INST_NAME(IADD_RS) - INST_NAME(IADD_M) - INST_NAME(ISUB_R) - INST_NAME(ISUB_M) - INST_NAME(IMUL_R) - INST_NAME(IMUL_M) - INST_NAME(IMULH_R) - INST_NAME(IMULH_M) - INST_NAME(ISMULH_R) - INST_NAME(ISMULH_M) - INST_NAME(IMUL_RCP) - INST_NAME(INEG_R) - INST_NAME(IXOR_R) - INST_NAME(IXOR_M) - INST_NAME(IROR_R) - INST_NAME(IROL_R) - INST_NAME(ISWAP_R) - INST_NAME(FSWAP_R) - INST_NAME(FADD_R) - INST_NAME(FADD_M) - INST_NAME(FSUB_R) - INST_NAME(FSUB_M) - INST_NAME(FSCAL_R) - INST_NAME(FMUL_R) - INST_NAME(FDIV_M) - INST_NAME(FSQRT_R) - INST_NAME(CBRANCH) - INST_NAME(CFROUND) - INST_NAME(ISTORE) - INST_NAME(NOP) - }; - - InstructionFormatter Instruction::engine[256] = { - INST_HANDLE(IADD_RS) - INST_HANDLE(IADD_M) - INST_HANDLE(ISUB_R) - INST_HANDLE(ISUB_M) - INST_HANDLE(IMUL_R) - INST_HANDLE(IMUL_M) - INST_HANDLE(IMULH_R) - INST_HANDLE(IMULH_M) - INST_HANDLE(ISMULH_R) - INST_HANDLE(ISMULH_M) - INST_HANDLE(IMUL_RCP) - INST_HANDLE(INEG_R) - INST_HANDLE(IXOR_R) - INST_HANDLE(IXOR_M) - INST_HANDLE(IROR_R) - INST_HANDLE(IROL_R) - INST_HANDLE(ISWAP_R) - INST_HANDLE(FSWAP_R) - INST_HANDLE(FADD_R) - INST_HANDLE(FADD_M) - INST_HANDLE(FSUB_R) - INST_HANDLE(FSUB_M) - INST_HANDLE(FSCAL_R) - INST_HANDLE(FMUL_R) - INST_HANDLE(FDIV_M) - INST_HANDLE(FSQRT_R) - INST_HANDLE(CBRANCH) - INST_HANDLE(CFROUND) - INST_HANDLE(ISTORE) - INST_HANDLE(NOP) - }; - -} \ No newline at end of file diff --git a/external/src/randomx/src/instruction.hpp b/external/src/randomx/src/instruction.hpp deleted file mode 100644 index b1863b5..0000000 --- a/external/src/randomx/src/instruction.hpp +++ /dev/null @@ -1,149 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include -#include -#include -#include "blake2/endian.h" - -namespace randomx { - - class Instruction; - - typedef void(Instruction::*InstructionFormatter)(std::ostream&) const; - - enum class InstructionType : uint16_t { - IADD_RS = 0, - IADD_M = 1, - ISUB_R = 2, - ISUB_M = 3, - IMUL_R = 4, - IMUL_M = 5, - IMULH_R = 6, - IMULH_M = 7, - ISMULH_R = 8, - ISMULH_M = 9, - IMUL_RCP = 10, - INEG_R = 11, - IXOR_R = 12, - IXOR_M = 13, - IROR_R = 14, - IROL_R = 15, - ISWAP_R = 16, - FSWAP_R = 17, - FADD_R = 18, - FADD_M = 19, - FSUB_R = 20, - FSUB_M = 21, - FSCAL_R = 22, - FMUL_R = 23, - FDIV_M = 24, - FSQRT_R = 25, - CBRANCH = 26, - CFROUND = 27, - ISTORE = 28, - NOP = 29, - }; - - class Instruction { - public: - uint32_t getImm32() const { - return load32(&imm32); - } - void setImm32(uint32_t val) { - return store32(&imm32, val); - } - const char* getName() const { - return names[opcode]; - } - friend std::ostream& operator<<(std::ostream& os, const Instruction& i) { - i.print(os); - return os; - } - int getModMem() const { - return mod % 4; //bits 0-1 - } - int getModShift() const { - return (mod >> 2) % 4; //bits 2-3 - } - int getModCond() const { - return mod >> 4; //bits 4-7 - } - void setMod(uint8_t val) { - mod = val; - } - - uint8_t opcode; - uint8_t dst; - uint8_t src; - uint8_t mod; - uint32_t imm32; - private: - void print(std::ostream&) const; - static const char* names[256]; - static InstructionFormatter engine[256]; - void genAddressReg(std::ostream& os, int) const; - void genAddressImm(std::ostream& os) const; - void genAddressRegDst(std::ostream&, int) const; - void h_IADD_RS(std::ostream&) const; - void h_IADD_M(std::ostream&) const; - void h_ISUB_R(std::ostream&) const; - void h_ISUB_M(std::ostream&) const; - void h_IMUL_R(std::ostream&) const; - void h_IMUL_M(std::ostream&) const; - void h_IMULH_R(std::ostream&) const; - void h_IMULH_M(std::ostream&) const; - void h_ISMULH_R(std::ostream&) const; - void h_ISMULH_M(std::ostream&) const; - void h_IMUL_RCP(std::ostream&) const; - void h_INEG_R(std::ostream&) const; - void h_IXOR_R(std::ostream&) const; - void h_IXOR_M(std::ostream&) const; - void h_IROR_R(std::ostream&) const; - void h_IROL_R(std::ostream&) const; - void h_ISWAP_R(std::ostream&) const; - void h_FSWAP_R(std::ostream&) const; - void h_FADD_R(std::ostream&) const; - void h_FADD_M(std::ostream&) const; - void h_FSUB_R(std::ostream&) const; - void h_FSUB_M(std::ostream&) const; - void h_FSCAL_R(std::ostream&) const; - void h_FMUL_R(std::ostream&) const; - void h_FDIV_M(std::ostream&) const; - void h_FSQRT_R(std::ostream&) const; - void h_CBRANCH(std::ostream&) const; - void h_CFROUND(std::ostream&) const; - void h_ISTORE(std::ostream&) const; - void h_NOP(std::ostream&) const; - }; - - static_assert(sizeof(Instruction) == 8, "Invalid size of struct randomx::Instruction"); - static_assert(std::is_standard_layout(), "randomx::Instruction must be a standard-layout struct"); -} \ No newline at end of file diff --git a/external/src/randomx/src/instruction_weights.hpp b/external/src/randomx/src/instruction_weights.hpp deleted file mode 100644 index f6c8873..0000000 --- a/external/src/randomx/src/instruction_weights.hpp +++ /dev/null @@ -1,73 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#define REP0(x) -#define REP1(x) x, -#define REP2(x) REP1(x) x, -#define REP3(x) REP2(x) x, -#define REP4(x) REP3(x) x, -#define REP5(x) REP4(x) x, -#define REP6(x) REP5(x) x, -#define REP7(x) REP6(x) x, -#define REP8(x) REP7(x) x, -#define REP9(x) REP8(x) x, -#define REP10(x) REP9(x) x, -#define REP11(x) REP10(x) x, -#define REP12(x) REP11(x) x, -#define REP13(x) REP12(x) x, -#define REP14(x) REP13(x) x, -#define REP15(x) REP14(x) x, -#define REP16(x) REP15(x) x, -#define REP17(x) REP16(x) x, -#define REP18(x) REP17(x) x, -#define REP19(x) REP18(x) x, -#define REP20(x) REP19(x) x, -#define REP21(x) REP20(x) x, -#define REP22(x) REP21(x) x, -#define REP23(x) REP22(x) x, -#define REP24(x) REP23(x) x, -#define REP25(x) REP24(x) x, -#define REP26(x) REP25(x) x, -#define REP27(x) REP26(x) x, -#define REP28(x) REP27(x) x, -#define REP29(x) REP28(x) x, -#define REP30(x) REP29(x) x, -#define REP31(x) REP30(x) x, -#define REP32(x) REP31(x) x, -#define REP33(x) REP32(x) x, -#define REP40(x) REP32(x) REP8(x) -#define REP64(x) REP32(x) REP32(x) -#define REP128(x) REP32(x) REP32(x) REP32(x) REP32(x) -#define REP232(x) REP128(x) REP40(x) REP40(x) REP24(x) -#define REP256(x) REP128(x) REP128(x) -#define REPNX(x,N) REP##N(x) -#define REPN(x,N) REPNX(x,N) -#define NUM(x) x -#define WT(x) NUM(RANDOMX_FREQ_##x) diff --git a/external/src/randomx/src/instructions_portable.cpp b/external/src/randomx/src/instructions_portable.cpp deleted file mode 100644 index d746727..0000000 --- a/external/src/randomx/src/instructions_portable.cpp +++ /dev/null @@ -1,208 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include -#include -#include "common.hpp" -#include "intrin_portable.h" -#include "blake2/endian.h" - -#if defined(__SIZEOF_INT128__) - typedef unsigned __int128 uint128_t; - typedef __int128 int128_t; - uint64_t mulh(uint64_t a, uint64_t b) { - return ((uint128_t)a * b) >> 64; - } - int64_t smulh(int64_t a, int64_t b) { - return ((int128_t)a * b) >> 64; - } - #define HAVE_MULH - #define HAVE_SMULH -#endif - -#if defined(_MSC_VER) - #define HAS_VALUE(X) X ## 0 - #define EVAL_DEFINE(X) HAS_VALUE(X) - #include - #include - - uint64_t rotl(uint64_t x, unsigned int c) { - return _rotl64(x, c); - } - uint64_t rotr(uint64_t x, unsigned int c) { - return _rotr64(x, c); - } - #define HAVE_ROTL - #define HAVE_ROTR - - #if EVAL_DEFINE(__MACHINEARM64_X64(1)) - uint64_t mulh(uint64_t a, uint64_t b) { - return __umulh(a, b); - } - #define HAVE_MULH - #endif - - #if EVAL_DEFINE(__MACHINEX64(1)) - int64_t smulh(int64_t a, int64_t b) { - int64_t hi; - _mul128(a, b, &hi); - return hi; - } - #define HAVE_SMULH - #endif - - static void setRoundMode_(uint32_t mode) { - _controlfp(mode, _MCW_RC); - } - #define HAVE_SETROUNDMODE_IMPL -#endif - -#ifndef HAVE_SETROUNDMODE_IMPL - static void setRoundMode_(uint32_t mode) { - fesetround(mode); - } -#endif - -#ifndef HAVE_ROTR - uint64_t rotr(uint64_t a, unsigned int b) { - return (a >> b) | (a << (-b & 63)); - } - #define HAVE_ROTR -#endif - -#ifndef HAVE_ROTL - uint64_t rotl(uint64_t a, unsigned int b) { - return (a << b) | (a >> (-b & 63)); - } - #define HAVE_ROTL -#endif - -#ifndef HAVE_MULH - #define LO(x) ((x)&0xffffffff) - #define HI(x) ((x)>>32) - uint64_t mulh(uint64_t a, uint64_t b) { - uint64_t ah = HI(a), al = LO(a); - uint64_t bh = HI(b), bl = LO(b); - uint64_t x00 = al * bl; - uint64_t x01 = al * bh; - uint64_t x10 = ah * bl; - uint64_t x11 = ah * bh; - uint64_t m1 = LO(x10) + LO(x01) + HI(x00); - uint64_t m2 = HI(x10) + HI(x01) + LO(x11) + HI(m1); - uint64_t m3 = HI(x11) + HI(m2); - - return (m3 << 32) + LO(m2); - } - #define HAVE_MULH -#endif - -#ifndef HAVE_SMULH - int64_t smulh(int64_t a, int64_t b) { - int64_t hi = mulh(a, b); - if (a < 0LL) hi -= b; - if (b < 0LL) hi -= a; - return hi; - } - #define HAVE_SMULH -#endif - -#ifdef RANDOMX_DEFAULT_FENV - -void rx_reset_float_state() { - setRoundMode_(FE_TONEAREST); - rx_set_double_precision(); //set precision to 53 bits if needed by the platform -} - -void rx_set_rounding_mode(uint32_t mode) { - switch (mode & 3) { - case RoundDown: - setRoundMode_(FE_DOWNWARD); - break; - case RoundUp: - setRoundMode_(FE_UPWARD); - break; - case RoundToZero: - setRoundMode_(FE_TOWARDZERO); - break; - case RoundToNearest: - setRoundMode_(FE_TONEAREST); - break; - default: - UNREACHABLE; - } -} - -uint32_t rx_get_rounding_mode() { - switch (fegetround()) { - case FE_DOWNWARD: - return RoundDown; - case FE_UPWARD: - return RoundUp; - case FE_TOWARDZERO: - return RoundToZero; - case FE_TONEAREST: - return RoundToNearest; - default: - UNREACHABLE; - } -} - -#endif - -#ifdef RANDOMX_USE_X87 - -#if defined(_MSC_VER) && defined(_M_IX86) - -void rx_set_double_precision() { - _control87(_PC_53, _MCW_PC); -} - -#elif defined(__i386) - -void rx_set_double_precision() { - uint16_t volatile x87cw; - asm volatile("fstcw %0" : "=m" (x87cw)); - x87cw &= ~0x300; - x87cw |= 0x200; - asm volatile("fldcw %0" : : "m" (x87cw)); -} - -#endif - -#endif //RANDOMX_USE_X87 - -union double_ser_t { - double f; - uint64_t i; -}; - -double loadDoublePortable(const void* addr) { - double_ser_t ds; - ds.i = load64(addr); - return ds.f; -} diff --git a/external/src/randomx/src/intrin_portable.h b/external/src/randomx/src/intrin_portable.h deleted file mode 100644 index 5e42172..0000000 --- a/external/src/randomx/src/intrin_portable.h +++ /dev/null @@ -1,751 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include -#include "blake2/endian.h" - -constexpr int32_t unsigned32ToSigned2sCompl(uint32_t x) { - return (-1 == ~0) ? (int32_t)x : (x > INT32_MAX ? (-(int32_t)(UINT32_MAX - x) - 1) : (int32_t)x); -} - -constexpr int64_t unsigned64ToSigned2sCompl(uint64_t x) { - return (-1 == ~0) ? (int64_t)x : (x > INT64_MAX ? (-(int64_t)(UINT64_MAX - x) - 1) : (int64_t)x); -} - -constexpr uint64_t signExtend2sCompl(uint32_t x) { - return (-1 == ~0) ? (int64_t)(int32_t)(x) : (x > INT32_MAX ? (x | 0xffffffff00000000ULL) : (uint64_t)x); -} - -constexpr int RoundToNearest = 0; -constexpr int RoundDown = 1; -constexpr int RoundUp = 2; -constexpr int RoundToZero = 3; - -//MSVC doesn't define __SSE2__, so we have to define it manually if SSE2 is available -#if !defined(__SSE2__) && (defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2)) -#define __SSE2__ 1 -#endif - -//MSVC doesn't define __AES__ -#if defined(_MSC_VER) && defined(__SSE2__) -#define __AES__ -#endif - -//the library "sqrt" function provided by MSVC for x86 targets doesn't give -//the correct results, so we have to use inline assembly to call x87 fsqrt directly -#if !defined(__SSE2__) -#if defined(_MSC_VER) && defined(_M_IX86) -inline double __cdecl rx_sqrt(double x) { - __asm { - fld x - fsqrt - } -} -#define rx_sqrt rx_sqrt - -void rx_set_double_precision(); -#define RANDOMX_USE_X87 - -#elif defined(__i386) - -void rx_set_double_precision(); -#define RANDOMX_USE_X87 - -#endif -#endif //__SSE2__ - -#if !defined(rx_sqrt) -#define rx_sqrt sqrt -#endif - -#if !defined(RANDOMX_USE_X87) -#define rx_set_double_precision(x) -#endif - -#ifdef __SSE2__ -#ifdef __GNUC__ -#include -#else -#include -#endif - -typedef __m128i rx_vec_i128; -typedef __m128d rx_vec_f128; - -#define rx_aligned_alloc(a, b) _mm_malloc(a,b) -#define rx_aligned_free(a) _mm_free(a) -#define rx_prefetch_nta(x) _mm_prefetch((const char *)(x), _MM_HINT_NTA) -#define rx_prefetch_t0(x) _mm_prefetch((const char *)(x), _MM_HINT_T0) - -#define rx_load_vec_f128 _mm_load_pd -#define rx_store_vec_f128 _mm_store_pd -#define rx_add_vec_f128 _mm_add_pd -#define rx_sub_vec_f128 _mm_sub_pd -#define rx_mul_vec_f128 _mm_mul_pd -#define rx_div_vec_f128 _mm_div_pd -#define rx_sqrt_vec_f128 _mm_sqrt_pd - -FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) { - return _mm_shuffle_pd(a, a, 1); -} - -FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) { - return _mm_castsi128_pd(_mm_set_epi64x(x1, x0)); -} - -FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) { - return _mm_castsi128_pd(_mm_set1_epi64x(x)); -} - -#define rx_xor_vec_f128 _mm_xor_pd -#define rx_and_vec_f128 _mm_and_pd -#define rx_or_vec_f128 _mm_or_pd - -#ifdef __AES__ - -#define rx_aesenc_vec_i128 _mm_aesenc_si128 -#define rx_aesdec_vec_i128 _mm_aesdec_si128 - -#define HAVE_AES 1 - -#endif //__AES__ - -FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) { - return _mm_cvtsi128_si32(a); -} - -FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) { - return _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0x55)); -} - -FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) { - return _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0xaa)); -} - -FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) { - return _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0xff)); -} - -#define rx_set_int_vec_i128 _mm_set_epi32 -#define rx_xor_vec_i128 _mm_xor_si128 -#define rx_load_vec_i128 _mm_load_si128 -#define rx_store_vec_i128 _mm_store_si128 - -FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) { - __m128i ix = _mm_loadl_epi64((const __m128i*)addr); - return _mm_cvtepi32_pd(ix); -} - -constexpr uint32_t rx_mxcsr_default = 0x9FC0; //Flush to zero, denormals are zero, default rounding mode, all exceptions disabled - -FORCE_INLINE void rx_reset_float_state() { - _mm_setcsr(rx_mxcsr_default); -} - -FORCE_INLINE void rx_set_rounding_mode(uint32_t mode) { - _mm_setcsr(rx_mxcsr_default | (mode << 13)); -} - -FORCE_INLINE uint32_t rx_get_rounding_mode() { - return (_mm_getcsr() >> 13) & 3; -} - -#elif defined(__PPC64__) && defined(__ALTIVEC__) && defined(__VSX__) //sadly only POWER7 and newer will be able to use SIMD acceleration. Earlier processors cant use doubles or 64 bit integers with SIMD -#include -#include -#include -#include -#undef vector -#undef pixel -#undef bool - -typedef __vector uint8_t __m128i; -typedef __vector uint32_t __m128l; -typedef __vector int __m128li; -typedef __vector uint64_t __m128ll; -typedef __vector double __m128d; - -typedef __m128i rx_vec_i128; -typedef __m128d rx_vec_f128; -typedef union{ - rx_vec_i128 i; - rx_vec_f128 d; - uint64_t u64[2]; - double d64[2]; - uint32_t u32[4]; - int i32[4]; -} vec_u; - -#define rx_aligned_alloc(a, b) malloc(a) -#define rx_aligned_free(a) free(a) -#define rx_prefetch_nta(x) -#define rx_prefetch_t0(x) - -/* Splat 64-bit long long to 2 64-bit long longs */ -FORCE_INLINE __m128i vec_splat2sd (int64_t scalar) -{ return (__m128i) vec_splats (scalar); } - -FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) { -#if defined(NATIVE_LITTLE_ENDIAN) - return (rx_vec_f128)vec_vsx_ld(0,pd); -#else - vec_u t; - t.u64[0] = load64(pd + 0); - t.u64[1] = load64(pd + 1); - return (rx_vec_f128)t.d; -#endif -} - -FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 a) { -#if defined(NATIVE_LITTLE_ENDIAN) - vec_vsx_st(a,0,(rx_vec_f128*)mem_addr); -#else - vec_u _a; - _a.d = a; - store64(mem_addr + 0, _a.u64[0]); - store64(mem_addr + 1, _a.u64[1]); -#endif -} - -FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) { - return (rx_vec_f128)vec_perm((__m128i)a,(__m128i)a,(__m128i){8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7}); -} - -FORCE_INLINE rx_vec_f128 rx_add_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { - return (rx_vec_f128)vec_add(a,b); -} - -FORCE_INLINE rx_vec_f128 rx_sub_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { - return (rx_vec_f128)vec_sub(a,b); -} - -FORCE_INLINE rx_vec_f128 rx_mul_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { - return (rx_vec_f128)vec_mul(a,b); -} - -FORCE_INLINE rx_vec_f128 rx_div_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { - return (rx_vec_f128)vec_div(a,b); -} - -FORCE_INLINE rx_vec_f128 rx_sqrt_vec_f128(rx_vec_f128 a) { - return (rx_vec_f128)vec_sqrt(a); -} - -FORCE_INLINE rx_vec_i128 rx_set1_long_vec_i128(uint64_t a) { - return (rx_vec_i128)vec_splat2sd(a); -} - -FORCE_INLINE rx_vec_f128 rx_vec_i128_vec_f128(rx_vec_i128 a) { - return (rx_vec_f128)a; -} - -FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) { - return (rx_vec_f128)(__m128ll){x0,x1}; -} - -FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) { - return (rx_vec_f128)vec_splat2sd(x); -} - -FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { - return (rx_vec_f128)vec_xor(a,b); -} - -FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { - return (rx_vec_f128)vec_and(a,b); -} - -FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { - return (rx_vec_f128)vec_or(a,b); -} - -#if defined(__CRYPTO__) - -FORCE_INLINE __m128ll vrev(__m128i v){ -#if defined(NATIVE_LITTLE_ENDIAN) - return (__m128ll)vec_perm((__m128i)v,(__m128i){0},(__m128i){15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}); -#else - return (__m128ll)vec_perm((__m128i)v,(__m128i){0},(__m128i){3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12}); -#endif -} - -FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) { - __m128ll _v = vrev(v); - __m128ll _rkey = vrev(rkey); - __m128ll result = vrev((__m128i)__builtin_crypto_vcipher(_v,_rkey)); - return (rx_vec_i128)result; -} - -FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) { - __m128ll _v = vrev(v); - __m128ll zero = (__m128ll){0}; - __m128ll out = vrev((__m128i)__builtin_crypto_vncipher(_v,zero)); - return (rx_vec_i128)vec_xor((__m128i)out,rkey); -} -#define HAVE_AES 1 - -#endif //__CRYPTO__ - -FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) { - vec_u _a; - _a.i = a; - return _a.i32[0]; -} - -FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) { - vec_u _a; - _a.i = a; - return _a.i32[1]; -} - -FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) { - vec_u _a; - _a.i = a; - return _a.i32[2]; -} - -FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) { - vec_u _a; - _a.i = a; - return _a.i32[3]; -} - -FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int _I3, int _I2, int _I1, int _I0) { - return (rx_vec_i128)((__m128li){_I0,_I1,_I2,_I3}); -}; - -FORCE_INLINE rx_vec_i128 rx_xor_vec_i128(rx_vec_i128 _A, rx_vec_i128 _B) { - return (rx_vec_i128)vec_xor(_A,_B); -} - -FORCE_INLINE rx_vec_i128 rx_load_vec_i128(rx_vec_i128 const *_P) { -#if defined(NATIVE_LITTLE_ENDIAN) - return *_P; -#else - uint32_t* ptr = (uint32_t*)_P; - vec_u c; - c.u32[0] = load32(ptr + 0); - c.u32[1] = load32(ptr + 1); - c.u32[2] = load32(ptr + 2); - c.u32[3] = load32(ptr + 3); - return (rx_vec_i128)c.i; -#endif -} - -FORCE_INLINE void rx_store_vec_i128(rx_vec_i128 *_P, rx_vec_i128 _B) { -#if defined(NATIVE_LITTLE_ENDIAN) - *_P = _B; -#else - uint32_t* ptr = (uint32_t*)_P; - vec_u B; - B.i = _B; - store32(ptr + 0, B.u32[0]); - store32(ptr + 1, B.u32[1]); - store32(ptr + 2, B.u32[2]); - store32(ptr + 3, B.u32[3]); -#endif -} - -FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) { - vec_u x; - x.d64[0] = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0)); - x.d64[1] = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4)); - return (rx_vec_f128)x.d; -} - -#define RANDOMX_DEFAULT_FENV - -#elif defined(__aarch64__) - -#include -#include -#include - -typedef uint8x16_t rx_vec_i128; -typedef float64x2_t rx_vec_f128; - -inline void* rx_aligned_alloc(size_t size, size_t align) { - void* p; - if (posix_memalign(&p, align, size) == 0) - return p; - - return 0; -}; - -#define rx_aligned_free(a) free(a) - -inline void rx_prefetch_nta(void* ptr) { - asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr)); -} - -inline void rx_prefetch_t0(const void* ptr) { - asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr)); -} - -FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) { - return vld1q_f64((const float64_t*)pd); -} - -FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 val) { - vst1q_f64((float64_t*)mem_addr, val); -} - -FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) { - float64x2_t temp; - temp = vcopyq_laneq_f64(temp, 1, a, 1); - a = vcopyq_laneq_f64(a, 1, a, 0); - return vcopyq_laneq_f64(a, 0, temp, 1); -} - -FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) { - uint64x2_t temp0 = vdupq_n_u64(x0); - uint64x2_t temp1 = vdupq_n_u64(x1); - return vreinterpretq_f64_u64(vcopyq_laneq_u64(temp0, 1, temp1, 0)); -} - -FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) { - return vreinterpretq_f64_u64(vdupq_n_u64(x)); -} - -#define rx_add_vec_f128 vaddq_f64 -#define rx_sub_vec_f128 vsubq_f64 -#define rx_mul_vec_f128 vmulq_f64 -#define rx_div_vec_f128 vdivq_f64 -#define rx_sqrt_vec_f128 vsqrtq_f64 - -FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { - return vreinterpretq_f64_u8(veorq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b))); -} - -FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { - return vreinterpretq_f64_u8(vandq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b))); -} - -FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { - return vreinterpretq_f64_u8(vorrq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b))); -} - -#ifdef __ARM_FEATURE_CRYPTO - - -FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 a, rx_vec_i128 key) { - const uint8x16_t zero = { 0 }; - return vaesmcq_u8(vaeseq_u8(a, zero)) ^ key; -} - -FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 a, rx_vec_i128 key) { - const uint8x16_t zero = { 0 }; - return vaesimcq_u8(vaesdq_u8(a, zero)) ^ key; -} - -#define HAVE_AES 1 - -#endif - -#define rx_xor_vec_i128 veorq_u8 - -FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) { - return vgetq_lane_s32(vreinterpretq_s32_u8(a), 0); -} - -FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) { - return vgetq_lane_s32(vreinterpretq_s32_u8(a), 1); -} - -FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) { - return vgetq_lane_s32(vreinterpretq_s32_u8(a), 2); -} - -FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) { - return vgetq_lane_s32(vreinterpretq_s32_u8(a), 3); -} - -FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int _I3, int _I2, int _I1, int _I0) { - int32_t data[4]; - data[0] = _I0; - data[1] = _I1; - data[2] = _I2; - data[3] = _I3; - return vreinterpretq_u8_s32(vld1q_s32(data)); -}; - -#define rx_xor_vec_i128 veorq_u8 - -FORCE_INLINE rx_vec_i128 rx_load_vec_i128(const rx_vec_i128* mem_addr) { - return vld1q_u8((const uint8_t*)mem_addr); -} - -FORCE_INLINE void rx_store_vec_i128(rx_vec_i128* mem_addr, rx_vec_i128 val) { - vst1q_u8((uint8_t*)mem_addr, val); -} - -FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) { - double lo = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0)); - double hi = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4)); - rx_vec_f128 x; - x = vsetq_lane_f64(lo, x, 0); - x = vsetq_lane_f64(hi, x, 1); - return x; -} - -#define RANDOMX_DEFAULT_FENV - -#else //portable fallback - -#include -#include -#include -#include - -typedef union { - uint64_t u64[2]; - uint32_t u32[4]; - uint16_t u16[8]; - uint8_t u8[16]; -} rx_vec_i128; - -typedef union { - struct { - double lo; - double hi; - }; - rx_vec_i128 i; -} rx_vec_f128; - -#define rx_aligned_alloc(a, b) malloc(a) -#define rx_aligned_free(a) free(a) -#define rx_prefetch_nta(x) -#define rx_prefetch_t0(x) - -FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) { - rx_vec_f128 x; - x.i.u64[0] = load64(pd + 0); - x.i.u64[1] = load64(pd + 1); - return x; -} - -FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 a) { - store64(mem_addr + 0, a.i.u64[0]); - store64(mem_addr + 1, a.i.u64[1]); -} - -FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) { - double temp = a.hi; - a.hi = a.lo; - a.lo = temp; - return a; -} - -FORCE_INLINE rx_vec_f128 rx_add_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { - rx_vec_f128 x; - x.lo = a.lo + b.lo; - x.hi = a.hi + b.hi; - return x; -} - -FORCE_INLINE rx_vec_f128 rx_sub_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { - rx_vec_f128 x; - x.lo = a.lo - b.lo; - x.hi = a.hi - b.hi; - return x; -} - -FORCE_INLINE rx_vec_f128 rx_mul_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { - rx_vec_f128 x; - x.lo = a.lo * b.lo; - x.hi = a.hi * b.hi; - return x; -} - -FORCE_INLINE rx_vec_f128 rx_div_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { - rx_vec_f128 x; - x.lo = a.lo / b.lo; - x.hi = a.hi / b.hi; - return x; -} - -FORCE_INLINE rx_vec_f128 rx_sqrt_vec_f128(rx_vec_f128 a) { - rx_vec_f128 x; - x.lo = rx_sqrt(a.lo); - x.hi = rx_sqrt(a.hi); - return x; -} - -FORCE_INLINE rx_vec_i128 rx_set1_long_vec_i128(uint64_t a) { - rx_vec_i128 x; - x.u64[0] = a; - x.u64[1] = a; - return x; -} - -FORCE_INLINE rx_vec_f128 rx_vec_i128_vec_f128(rx_vec_i128 a) { - rx_vec_f128 x; - x.i = a; - return x; -} - -FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) { - rx_vec_f128 v; - v.i.u64[0] = x0; - v.i.u64[1] = x1; - return v; -} - -FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) { - rx_vec_f128 v; - v.i.u64[0] = x; - v.i.u64[1] = x; - return v; -} - -FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { - rx_vec_f128 x; - x.i.u64[0] = a.i.u64[0] ^ b.i.u64[0]; - x.i.u64[1] = a.i.u64[1] ^ b.i.u64[1]; - return x; -} - -FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { - rx_vec_f128 x; - x.i.u64[0] = a.i.u64[0] & b.i.u64[0]; - x.i.u64[1] = a.i.u64[1] & b.i.u64[1]; - return x; -} - -FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { - rx_vec_f128 x; - x.i.u64[0] = a.i.u64[0] | b.i.u64[0]; - x.i.u64[1] = a.i.u64[1] | b.i.u64[1]; - return x; -} - -FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) { - return a.u32[0]; -} - -FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) { - return a.u32[1]; -} - -FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) { - return a.u32[2]; -} - -FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) { - return a.u32[3]; -} - -FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int _I3, int _I2, int _I1, int _I0) { - rx_vec_i128 v; - v.u32[0] = _I0; - v.u32[1] = _I1; - v.u32[2] = _I2; - v.u32[3] = _I3; - return v; -}; - -FORCE_INLINE rx_vec_i128 rx_xor_vec_i128(rx_vec_i128 _A, rx_vec_i128 _B) { - rx_vec_i128 c; - c.u32[0] = _A.u32[0] ^ _B.u32[0]; - c.u32[1] = _A.u32[1] ^ _B.u32[1]; - c.u32[2] = _A.u32[2] ^ _B.u32[2]; - c.u32[3] = _A.u32[3] ^ _B.u32[3]; - return c; -} - -FORCE_INLINE rx_vec_i128 rx_load_vec_i128(rx_vec_i128 const*_P) { -#if defined(NATIVE_LITTLE_ENDIAN) - return *_P; -#else - uint32_t* ptr = (uint32_t*)_P; - rx_vec_i128 c; - c.u32[0] = load32(ptr + 0); - c.u32[1] = load32(ptr + 1); - c.u32[2] = load32(ptr + 2); - c.u32[3] = load32(ptr + 3); - return c; -#endif -} - -FORCE_INLINE void rx_store_vec_i128(rx_vec_i128 *_P, rx_vec_i128 _B) { -#if defined(NATIVE_LITTLE_ENDIAN) - *_P = _B; -#else - uint32_t* ptr = (uint32_t*)_P; - store32(ptr + 0, _B.u32[0]); - store32(ptr + 1, _B.u32[1]); - store32(ptr + 2, _B.u32[2]); - store32(ptr + 3, _B.u32[3]); -#endif -} - -FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) { - rx_vec_f128 x; - x.lo = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0)); - x.hi = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4)); - return x; -} - -#define RANDOMX_DEFAULT_FENV - -#endif - -#ifndef HAVE_AES -static const char* platformError = "Platform doesn't support hardware AES"; - -#include - -FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128, rx_vec_i128) { - throw std::runtime_error(platformError); -} - -FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128, rx_vec_i128) { - throw std::runtime_error(platformError); -} - -#define HAVE_AES 0 - -#endif - -#ifdef RANDOMX_DEFAULT_FENV - -void rx_reset_float_state(); - -void rx_set_rounding_mode(uint32_t mode); - -uint32_t rx_get_rounding_mode(); - -#endif - -double loadDoublePortable(const void* addr); -uint64_t mulh(uint64_t, uint64_t); -int64_t smulh(int64_t, int64_t); -uint64_t rotl(uint64_t, unsigned int); -uint64_t rotr(uint64_t, unsigned int); diff --git a/external/src/randomx/src/jit_compiler.hpp b/external/src/randomx/src/jit_compiler.hpp deleted file mode 100644 index 17fdad4..0000000 --- a/external/src/randomx/src/jit_compiler.hpp +++ /dev/null @@ -1,41 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#if defined(_M_X64) || defined(__x86_64__) -#include "jit_compiler_x86.hpp" -#elif defined(__aarch64__) -#include "jit_compiler_a64.hpp" -#else -#include "jit_compiler_fallback.hpp" -#endif - -#if defined(__OpenBSD__) || defined(__NetBSD__) || (defined(__APPLE__) && defined(__aarch64__)) -#define RANDOMX_FORCE_SECURE -#endif diff --git a/external/src/randomx/src/jit_compiler_a64.cpp b/external/src/randomx/src/jit_compiler_a64.cpp deleted file mode 100644 index e45774e..0000000 --- a/external/src/randomx/src/jit_compiler_a64.cpp +++ /dev/null @@ -1,1072 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador -Copyright (c) 2019, SChernykh - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include "jit_compiler_a64.hpp" -#include "superscalar.hpp" -#include "program.hpp" -#include "reciprocal.h" -#include "virtual_memory.hpp" - -namespace ARMV8A { - -constexpr uint32_t B = 0x14000000; -constexpr uint32_t EOR = 0xCA000000; -constexpr uint32_t EOR32 = 0x4A000000; -constexpr uint32_t ADD = 0x8B000000; -constexpr uint32_t SUB = 0xCB000000; -constexpr uint32_t MUL = 0x9B007C00; -constexpr uint32_t UMULH = 0x9BC07C00; -constexpr uint32_t SMULH = 0x9B407C00; -constexpr uint32_t MOVZ = 0xD2800000; -constexpr uint32_t MOVN = 0x92800000; -constexpr uint32_t MOVK = 0xF2800000; -constexpr uint32_t ADD_IMM_LO = 0x91000000; -constexpr uint32_t ADD_IMM_HI = 0x91400000; -constexpr uint32_t LDR_LITERAL = 0x58000000; -constexpr uint32_t ROR = 0x9AC02C00; -constexpr uint32_t ROR_IMM = 0x93C00000; -constexpr uint32_t MOV_REG = 0xAA0003E0; -constexpr uint32_t MOV_VREG_EL = 0x6E080400; -constexpr uint32_t FADD = 0x4E60D400; -constexpr uint32_t FSUB = 0x4EE0D400; -constexpr uint32_t FEOR = 0x6E201C00; -constexpr uint32_t FMUL = 0x6E60DC00; -constexpr uint32_t FDIV = 0x6E60FC00; -constexpr uint32_t FSQRT = 0x6EE1F800; - -} - -namespace randomx { - -static const size_t CodeSize = ((uint8_t*)randomx_init_dataset_aarch64_end) - ((uint8_t*)randomx_program_aarch64); -static const size_t MainLoopBegin = ((uint8_t*)randomx_program_aarch64_main_loop) - ((uint8_t*)randomx_program_aarch64); -static const size_t PrologueSize = ((uint8_t*)randomx_program_aarch64_vm_instructions) - ((uint8_t*)randomx_program_aarch64); -static const size_t ImulRcpLiteralsEnd = ((uint8_t*)randomx_program_aarch64_imul_rcp_literals_end) - ((uint8_t*)randomx_program_aarch64); - -static const size_t CalcDatasetItemSize = - // Prologue - ((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch - (uint8_t*)randomx_calc_dataset_item_aarch64) + - // Main loop - RANDOMX_CACHE_ACCESSES * ( - // Main loop prologue - ((uint8_t*)randomx_calc_dataset_item_aarch64_mix - ((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch)) + 4 + - // Inner main loop (instructions) - ((RANDOMX_SUPERSCALAR_LATENCY * 3) + 2) * 16 + - // Main loop epilogue - ((uint8_t*)randomx_calc_dataset_item_aarch64_store_result - (uint8_t*)randomx_calc_dataset_item_aarch64_mix) + 4 - ) + - // Epilogue - ((uint8_t*)randomx_calc_dataset_item_aarch64_end - (uint8_t*)randomx_calc_dataset_item_aarch64_store_result); - -constexpr uint32_t IntRegMap[8] = { 4, 5, 6, 7, 12, 13, 14, 15 }; - -template static constexpr size_t Log2(T value) { return (value > 1) ? (Log2(value / 2) + 1) : 0; } - -JitCompilerA64::JitCompilerA64() - : code((uint8_t*) allocMemoryPages(CodeSize + CalcDatasetItemSize)) - , literalPos(ImulRcpLiteralsEnd) - , num32bitLiterals(0) -{ - memset(reg_changed_offset, 0, sizeof(reg_changed_offset)); - memcpy(code, (void*) randomx_program_aarch64, CodeSize); - -#ifdef __GNUC__ - __builtin___clear_cache(reinterpret_cast(code), reinterpret_cast(code + CodeSize)); -#endif -} - -JitCompilerA64::~JitCompilerA64() -{ - freePagedMemory(code, CodeSize + CalcDatasetItemSize); -} - -void JitCompilerA64::enableWriting() -{ - setPagesRW(code, CodeSize + CalcDatasetItemSize); -} - -void JitCompilerA64::enableExecution() -{ - setPagesRX(code, CodeSize + CalcDatasetItemSize); -} - -void JitCompilerA64::enableAll() -{ - setPagesRWX(code, CodeSize + CalcDatasetItemSize); -} - -void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& config) -{ - uint32_t codePos = MainLoopBegin + 4; - - // and w16, w10, ScratchpadL3Mask64 - emit32(0x121A0000 | 16 | (10 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos); - - // and w17, w18, ScratchpadL3Mask64 - emit32(0x121A0000 | 17 | (18 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos); - - codePos = PrologueSize; - literalPos = ImulRcpLiteralsEnd; - num32bitLiterals = 0; - - for (uint32_t i = 0; i < RegistersCount; ++i) - reg_changed_offset[i] = codePos; - - for (uint32_t i = 0; i < program.getSize(); ++i) - { - Instruction& instr = program(i); - instr.src %= RegistersCount; - instr.dst %= RegistersCount; - (this->*engine[instr.opcode])(instr, codePos); - } - - // Update spMix2 - // eor w18, config.readReg2, config.readReg3 - emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos); - - // Jump back to the main loop - const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64)) - codePos; - emit32(ARMV8A::B | (offset / 4), code, codePos); - - // and w18, w18, CacheLineAlignMask - codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask1) - ((uint8_t*)randomx_program_aarch64)); - emit32(0x121A0000 | 18 | (18 << 5) | ((Log2(RANDOMX_DATASET_BASE_SIZE) - 7) << 10), code, codePos); - - // and w10, w10, CacheLineAlignMask - codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask2) - ((uint8_t*)randomx_program_aarch64)); - emit32(0x121A0000 | 10 | (10 << 5) | ((Log2(RANDOMX_DATASET_BASE_SIZE) - 7) << 10), code, codePos); - - // Update spMix1 - // eor x10, config.readReg0, config.readReg1 - codePos = ((uint8_t*)randomx_program_aarch64_update_spMix1) - ((uint8_t*)randomx_program_aarch64); - emit32(ARMV8A::EOR | 10 | (IntRegMap[config.readReg0] << 5) | (IntRegMap[config.readReg1] << 16), code, codePos); - -#ifdef __GNUC__ - __builtin___clear_cache(reinterpret_cast(code + MainLoopBegin), reinterpret_cast(code + codePos)); -#endif -} - -void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration& config, uint32_t datasetOffset) -{ - uint32_t codePos = MainLoopBegin + 4; - - // and w16, w10, ScratchpadL3Mask64 - emit32(0x121A0000 | 16 | (10 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos); - - // and w17, w18, ScratchpadL3Mask64 - emit32(0x121A0000 | 17 | (18 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos); - - codePos = PrologueSize; - literalPos = ImulRcpLiteralsEnd; - num32bitLiterals = 0; - - for (uint32_t i = 0; i < RegistersCount; ++i) - reg_changed_offset[i] = codePos; - - for (uint32_t i = 0; i < program.getSize(); ++i) - { - Instruction& instr = program(i); - instr.src %= RegistersCount; - instr.dst %= RegistersCount; - (this->*engine[instr.opcode])(instr, codePos); - } - - // Update spMix2 - // eor w18, config.readReg2, config.readReg3 - emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos); - - // Jump back to the main loop - const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_light) - ((uint8_t*)randomx_program_aarch64)) - codePos; - emit32(ARMV8A::B | (offset / 4), code, codePos); - - // and w2, w9, CacheLineAlignMask - codePos = (((uint8_t*)randomx_program_aarch64_light_cacheline_align_mask) - ((uint8_t*)randomx_program_aarch64)); - emit32(0x121A0000 | 2 | (9 << 5) | ((Log2(RANDOMX_DATASET_BASE_SIZE) - 7) << 10), code, codePos); - - // Update spMix1 - // eor x10, config.readReg0, config.readReg1 - codePos = ((uint8_t*)randomx_program_aarch64_update_spMix1) - ((uint8_t*)randomx_program_aarch64); - emit32(ARMV8A::EOR | 10 | (IntRegMap[config.readReg0] << 5) | (IntRegMap[config.readReg1] << 16), code, codePos); - - // Apply dataset offset - codePos = ((uint8_t*)randomx_program_aarch64_light_dataset_offset) - ((uint8_t*)randomx_program_aarch64); - - datasetOffset /= CacheLineSize; - const uint32_t imm_lo = datasetOffset & ((1 << 12) - 1); - const uint32_t imm_hi = datasetOffset >> 12; - - emit32(ARMV8A::ADD_IMM_LO | 2 | (2 << 5) | (imm_lo << 10), code, codePos); - emit32(ARMV8A::ADD_IMM_HI | 2 | (2 << 5) | (imm_hi << 10), code, codePos); - -#ifdef __GNUC__ - __builtin___clear_cache(reinterpret_cast(code + MainLoopBegin), reinterpret_cast(code + codePos)); -#endif -} - -template -void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector &reciprocalCache) -{ - uint32_t codePos = CodeSize; - - uint8_t* p1 = (uint8_t*)randomx_calc_dataset_item_aarch64; - uint8_t* p2 = (uint8_t*)randomx_calc_dataset_item_aarch64_prefetch; - memcpy(code + codePos, p1, p2 - p1); - codePos += p2 - p1; - - num32bitLiterals = 64; - constexpr uint32_t tmp_reg = 12; - - for (size_t i = 0; i < N; ++i) - { - // and x11, x10, CacheSize / CacheLineSize - 1 - emit32(0x92400000 | 11 | (10 << 5) | ((Log2(CacheSize / CacheLineSize) - 1) << 10), code, codePos); - - p1 = ((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch) + 4; - p2 = (uint8_t*)randomx_calc_dataset_item_aarch64_mix; - memcpy(code + codePos, p1, p2 - p1); - codePos += p2 - p1; - - SuperscalarProgram& prog = programs[i]; - const size_t progSize = prog.getSize(); - - uint32_t jmp_pos = codePos; - codePos += 4; - - // Fill in literal pool - for (size_t j = 0; j < progSize; ++j) - { - const Instruction& instr = prog(j); - if (static_cast(instr.opcode) == randomx::SuperscalarInstructionType::IMUL_RCP) - emit64(reciprocalCache[instr.getImm32()], code, codePos); - } - - // Jump over literal pool - uint32_t literal_pos = jmp_pos; - emit32(ARMV8A::B | ((codePos - jmp_pos) / 4), code, literal_pos); - - for (size_t j = 0; j < progSize; ++j) - { - const Instruction& instr = prog(j); - const uint32_t src = instr.src; - const uint32_t dst = instr.dst; - - switch (static_cast(instr.opcode)) - { - case randomx::SuperscalarInstructionType::ISUB_R: - emit32(ARMV8A::SUB | dst | (dst << 5) | (src << 16), code, codePos); - break; - case randomx::SuperscalarInstructionType::IXOR_R: - emit32(ARMV8A::EOR | dst | (dst << 5) | (src << 16), code, codePos); - break; - case randomx::SuperscalarInstructionType::IADD_RS: - emit32(ARMV8A::ADD | dst | (dst << 5) | (instr.getModShift() << 10) | (src << 16), code, codePos); - break; - case randomx::SuperscalarInstructionType::IMUL_R: - emit32(ARMV8A::MUL | dst | (dst << 5) | (src << 16), code, codePos); - break; - case randomx::SuperscalarInstructionType::IROR_C: - emit32(ARMV8A::ROR_IMM | dst | (dst << 5) | ((instr.getImm32() & 63) << 10) | (dst << 16), code, codePos); - break; - case randomx::SuperscalarInstructionType::IADD_C7: - case randomx::SuperscalarInstructionType::IADD_C8: - case randomx::SuperscalarInstructionType::IADD_C9: - emitAddImmediate(dst, dst, instr.getImm32(), code, codePos); - break; - case randomx::SuperscalarInstructionType::IXOR_C7: - case randomx::SuperscalarInstructionType::IXOR_C8: - case randomx::SuperscalarInstructionType::IXOR_C9: - emitMovImmediate(tmp_reg, instr.getImm32(), code, codePos); - emit32(ARMV8A::EOR | dst | (dst << 5) | (tmp_reg << 16), code, codePos); - break; - case randomx::SuperscalarInstructionType::IMULH_R: - emit32(ARMV8A::UMULH | dst | (dst << 5) | (src << 16), code, codePos); - break; - case randomx::SuperscalarInstructionType::ISMULH_R: - emit32(ARMV8A::SMULH | dst | (dst << 5) | (src << 16), code, codePos); - break; - case randomx::SuperscalarInstructionType::IMUL_RCP: - { - int32_t offset = (literal_pos - codePos) / 4; - offset &= (1 << 19) - 1; - literal_pos += 8; - - // ldr tmp_reg, reciprocal - emit32(ARMV8A::LDR_LITERAL | tmp_reg | (offset << 5), code, codePos); - - // mul dst, dst, tmp_reg - emit32(ARMV8A::MUL | dst | (dst << 5) | (tmp_reg << 16), code, codePos); - } - break; - default: - break; - } - } - - p1 = (uint8_t*)randomx_calc_dataset_item_aarch64_mix; - p2 = (uint8_t*)randomx_calc_dataset_item_aarch64_store_result; - memcpy(code + codePos, p1, p2 - p1); - codePos += p2 - p1; - - // Update registerValue - emit32(ARMV8A::MOV_REG | 10 | (prog.getAddressRegister() << 16), code, codePos); - } - - p1 = (uint8_t*)randomx_calc_dataset_item_aarch64_store_result; - p2 = (uint8_t*)randomx_calc_dataset_item_aarch64_end; - memcpy(code + codePos, p1, p2 - p1); - codePos += p2 - p1; - -#ifdef __GNUC__ - __builtin___clear_cache(reinterpret_cast(code + CodeSize), reinterpret_cast(code + codePos)); -#endif -} - -template void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES], std::vector &reciprocalCache); - -DatasetInitFunc* JitCompilerA64::getDatasetInitFunc() -{ - return (DatasetInitFunc*)(code + (((uint8_t*)randomx_init_dataset_aarch64) - ((uint8_t*)randomx_program_aarch64))); -} - -size_t JitCompilerA64::getCodeSize() -{ - return CodeSize; -} - -void JitCompilerA64::emitMovImmediate(uint32_t dst, uint32_t imm, uint8_t* code, uint32_t& codePos) -{ - uint32_t k = codePos; - - if (imm < (1 << 16)) - { - // movz tmp_reg, imm32 (16 low bits) - emit32(ARMV8A::MOVZ | dst | (imm << 5), code, k); - } - else - { - if (num32bitLiterals < 64) - { - if (static_cast(imm) < 0) - { - // smov dst, vN.s[M] - emit32(0x4E042C00 | dst | ((num32bitLiterals / 4) << 5) | ((num32bitLiterals % 4) << 19), code, k); - } - else - { - // umov dst, vN.s[M] - emit32(0x0E043C00 | dst | ((num32bitLiterals / 4) << 5) | ((num32bitLiterals % 4) << 19), code, k); - } - - ((uint32_t*)(code + ImulRcpLiteralsEnd))[num32bitLiterals] = imm; - ++num32bitLiterals; - } - else - { - if (static_cast(imm) < 0) - { - // movn tmp_reg, ~imm32 (16 high bits) - emit32(ARMV8A::MOVN | dst | (1 << 21) | ((~imm >> 16) << 5), code, k); - } - else - { - // movz tmp_reg, imm32 (16 high bits) - emit32(ARMV8A::MOVZ | dst | (1 << 21) | ((imm >> 16) << 5), code, k); - } - - // movk tmp_reg, imm32 (16 low bits) - emit32(ARMV8A::MOVK | dst | ((imm & 0xFFFF) << 5), code, k); - } - } - - codePos = k; -} - -void JitCompilerA64::emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm, uint8_t* code, uint32_t& codePos) -{ - uint32_t k = codePos; - - if (imm < (1 << 24)) - { - const uint32_t imm_lo = imm & ((1 << 12) - 1); - const uint32_t imm_hi = imm >> 12; - - if (imm_lo && imm_hi) - { - emit32(ARMV8A::ADD_IMM_LO | dst | (src << 5) | (imm_lo << 10), code, k); - emit32(ARMV8A::ADD_IMM_HI | dst | (dst << 5) | (imm_hi << 10), code, k); - } - else if (imm_lo) - { - emit32(ARMV8A::ADD_IMM_LO | dst | (src << 5) | (imm_lo << 10), code, k); - } - else - { - emit32(ARMV8A::ADD_IMM_HI | dst | (src << 5) | (imm_hi << 10), code, k); - } - } - else - { - constexpr uint32_t tmp_reg = 18; - emitMovImmediate(tmp_reg, imm, code, k); - - // add dst, src, tmp_reg - emit32(ARMV8A::ADD | dst | (src << 5) | (tmp_reg << 16), code, k); - } - - codePos = k; -} - -template -void JitCompilerA64::emitMemLoad(uint32_t dst, uint32_t src, Instruction& instr, uint8_t* code, uint32_t& codePos) -{ - uint32_t k = codePos; - - uint32_t imm = instr.getImm32(); - - if (src != dst) - { - imm &= instr.getModMem() ? (RANDOMX_SCRATCHPAD_L1 - 1) : (RANDOMX_SCRATCHPAD_L2 - 1); - emitAddImmediate(tmp_reg, src, imm, code, k); - - constexpr uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5); - constexpr uint32_t andInstrL1 = t | ((Log2(RANDOMX_SCRATCHPAD_L1) - 4) << 10); - constexpr uint32_t andInstrL2 = t | ((Log2(RANDOMX_SCRATCHPAD_L2) - 4) << 10); - - emit32(instr.getModMem() ? andInstrL1 : andInstrL2, code, k); - - // ldr tmp_reg, [x2, tmp_reg] - emit32(0xf8606840 | tmp_reg | (tmp_reg << 16), code, k); - } - else - { - imm = (imm & ScratchpadL3Mask) >> 3; - emitMovImmediate(tmp_reg, imm, code, k); - - // ldr tmp_reg, [x2, tmp_reg, lsl 3] - emit32(0xf8607840 | tmp_reg | (tmp_reg << 16), code, k); - } - - codePos = k; -} - -template -void JitCompilerA64::emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* code, uint32_t& codePos) -{ - uint32_t k = codePos; - - uint32_t imm = instr.getImm32(); - constexpr uint32_t tmp_reg = 18; - - imm &= instr.getModMem() ? (RANDOMX_SCRATCHPAD_L1 - 1) : (RANDOMX_SCRATCHPAD_L2 - 1); - emitAddImmediate(tmp_reg, src, imm, code, k); - - constexpr uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5); - constexpr uint32_t andInstrL1 = t | ((Log2(RANDOMX_SCRATCHPAD_L1) - 4) << 10); - constexpr uint32_t andInstrL2 = t | ((Log2(RANDOMX_SCRATCHPAD_L2) - 4) << 10); - - emit32(instr.getModMem() ? andInstrL1 : andInstrL2, code, k); - - // add tmp_reg, x2, tmp_reg - emit32(ARMV8A::ADD | tmp_reg | (2 << 5) | (tmp_reg << 16), code, k); - - // ldpsw tmp_reg, tmp_reg + 1, [tmp_reg] - emit32(0x69400000 | tmp_reg | (tmp_reg << 5) | ((tmp_reg + 1) << 10), code, k); - - // ins tmp_reg_fp.d[0], tmp_reg - emit32(0x4E081C00 | tmp_reg_fp | (tmp_reg << 5), code, k); - - // ins tmp_reg_fp.d[1], tmp_reg + 1 - emit32(0x4E181C00 | tmp_reg_fp | ((tmp_reg + 1) << 5), code, k); - - // scvtf tmp_reg_fp.2d, tmp_reg_fp.2d - emit32(0x4E61D800 | tmp_reg_fp | (tmp_reg_fp << 5), code, k); - - codePos = k; -} - -void JitCompilerA64::h_IADD_RS(Instruction& instr, uint32_t& codePos) -{ - uint32_t k = codePos; - - const uint32_t src = IntRegMap[instr.src]; - const uint32_t dst = IntRegMap[instr.dst]; - const uint32_t shift = instr.getModShift(); - - // add dst, src << shift - emit32(ARMV8A::ADD | dst | (dst << 5) | (shift << 10) | (src << 16), code, k); - - if (instr.dst == RegisterNeedsDisplacement) - emitAddImmediate(dst, dst, instr.getImm32(), code, k); - - reg_changed_offset[instr.dst] = k; - codePos = k; -} - -void JitCompilerA64::h_IADD_M(Instruction& instr, uint32_t& codePos) -{ - uint32_t k = codePos; - - const uint32_t src = IntRegMap[instr.src]; - const uint32_t dst = IntRegMap[instr.dst]; - - constexpr uint32_t tmp_reg = 18; - emitMemLoad(dst, src, instr, code, k); - - // add dst, dst, tmp_reg - emit32(ARMV8A::ADD | dst | (dst << 5) | (tmp_reg << 16), code, k); - - reg_changed_offset[instr.dst] = k; - codePos = k; -} - -void JitCompilerA64::h_ISUB_R(Instruction& instr, uint32_t& codePos) -{ - uint32_t k = codePos; - - const uint32_t src = IntRegMap[instr.src]; - const uint32_t dst = IntRegMap[instr.dst]; - - if (src != dst) - { - // sub dst, dst, src - emit32(ARMV8A::SUB | dst | (dst << 5) | (src << 16), code, k); - } - else - { - emitAddImmediate(dst, dst, -instr.getImm32(), code, k); - } - - reg_changed_offset[instr.dst] = k; - codePos = k; -} - -void JitCompilerA64::h_ISUB_M(Instruction& instr, uint32_t& codePos) -{ - uint32_t k = codePos; - - const uint32_t src = IntRegMap[instr.src]; - const uint32_t dst = IntRegMap[instr.dst]; - - constexpr uint32_t tmp_reg = 18; - emitMemLoad(dst, src, instr, code, k); - - // sub dst, dst, tmp_reg - emit32(ARMV8A::SUB | dst | (dst << 5) | (tmp_reg << 16), code, k); - - reg_changed_offset[instr.dst] = k; - codePos = k; -} - -void JitCompilerA64::h_IMUL_R(Instruction& instr, uint32_t& codePos) -{ - uint32_t k = codePos; - - uint32_t src = IntRegMap[instr.src]; - const uint32_t dst = IntRegMap[instr.dst]; - - if (src == dst) - { - src = 18; - emitMovImmediate(src, instr.getImm32(), code, k); - } - - // mul dst, dst, src - emit32(ARMV8A::MUL | dst | (dst << 5) | (src << 16), code, k); - - reg_changed_offset[instr.dst] = k; - codePos = k; -} - -void JitCompilerA64::h_IMUL_M(Instruction& instr, uint32_t& codePos) -{ - uint32_t k = codePos; - - const uint32_t src = IntRegMap[instr.src]; - const uint32_t dst = IntRegMap[instr.dst]; - - constexpr uint32_t tmp_reg = 18; - emitMemLoad(dst, src, instr, code, k); - - // sub dst, dst, tmp_reg - emit32(ARMV8A::MUL | dst | (dst << 5) | (tmp_reg << 16), code, k); - - reg_changed_offset[instr.dst] = k; - codePos = k; -} - -void JitCompilerA64::h_IMULH_R(Instruction& instr, uint32_t& codePos) -{ - uint32_t k = codePos; - - const uint32_t src = IntRegMap[instr.src]; - const uint32_t dst = IntRegMap[instr.dst]; - - // umulh dst, dst, src - emit32(ARMV8A::UMULH | dst | (dst << 5) | (src << 16), code, k); - - reg_changed_offset[instr.dst] = k; - codePos = k; -} - -void JitCompilerA64::h_IMULH_M(Instruction& instr, uint32_t& codePos) -{ - uint32_t k = codePos; - - const uint32_t src = IntRegMap[instr.src]; - const uint32_t dst = IntRegMap[instr.dst]; - - constexpr uint32_t tmp_reg = 18; - emitMemLoad(dst, src, instr, code, k); - - // umulh dst, dst, tmp_reg - emit32(ARMV8A::UMULH | dst | (dst << 5) | (tmp_reg << 16), code, k); - - reg_changed_offset[instr.dst] = k; - codePos = k; -} - -void JitCompilerA64::h_ISMULH_R(Instruction& instr, uint32_t& codePos) -{ - uint32_t k = codePos; - - const uint32_t src = IntRegMap[instr.src]; - const uint32_t dst = IntRegMap[instr.dst]; - - // smulh dst, dst, src - emit32(ARMV8A::SMULH | dst | (dst << 5) | (src << 16), code, k); - - reg_changed_offset[instr.dst] = k; - codePos = k; -} - -void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos) -{ - uint32_t k = codePos; - - const uint32_t src = IntRegMap[instr.src]; - const uint32_t dst = IntRegMap[instr.dst]; - - constexpr uint32_t tmp_reg = 18; - emitMemLoad(dst, src, instr, code, k); - - // smulh dst, dst, tmp_reg - emit32(ARMV8A::SMULH | dst | (dst << 5) | (tmp_reg << 16), code, k); - - reg_changed_offset[instr.dst] = k; - codePos = k; -} - -void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos) -{ - const uint64_t divisor = instr.getImm32(); - if (isZeroOrPowerOf2(divisor)) - return; - - uint32_t k = codePos; - - constexpr uint32_t tmp_reg = 18; - const uint32_t dst = IntRegMap[instr.dst]; - - constexpr uint64_t N = 1ULL << 63; - const uint64_t q = N / divisor; - const uint64_t r = N % divisor; -#ifdef __GNUC__ - const uint64_t shift = 64 - __builtin_clzll(divisor); -#else - uint64_t shift = 32; - for (uint64_t k = 1U << 31; (k & divisor) == 0; k >>= 1) - --shift; -#endif - - const uint32_t literal_id = (ImulRcpLiteralsEnd - literalPos) / sizeof(uint64_t); - - literalPos -= sizeof(uint64_t); - *(uint64_t*)(code + literalPos) = (q << shift) + ((r << shift) / divisor); - - if (literal_id < 13) - { - static constexpr uint32_t literal_regs[13] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 20 << 16, 11 << 16, 0 }; - - // mul dst, dst, literal_reg - emit32(ARMV8A::MUL | dst | (dst << 5) | literal_regs[literal_id], code, k); - } - else - { - // ldr tmp_reg, reciprocal - const uint32_t offset = (literalPos - k) / 4; - emit32(ARMV8A::LDR_LITERAL | tmp_reg | (offset << 5), code, k); - - // mul dst, dst, tmp_reg - emit32(ARMV8A::MUL | dst | (dst << 5) | (tmp_reg << 16), code, k); - } - - reg_changed_offset[instr.dst] = k; - codePos = k; -} - -void JitCompilerA64::h_INEG_R(Instruction& instr, uint32_t& codePos) -{ - const uint32_t dst = IntRegMap[instr.dst]; - - // sub dst, xzr, dst - emit32(ARMV8A::SUB | dst | (31 << 5) | (dst << 16), code, codePos); - - reg_changed_offset[instr.dst] = codePos; -} - -void JitCompilerA64::h_IXOR_R(Instruction& instr, uint32_t& codePos) -{ - uint32_t k = codePos; - - uint32_t src = IntRegMap[instr.src]; - const uint32_t dst = IntRegMap[instr.dst]; - - if (src == dst) - { - src = 18; - emitMovImmediate(src, instr.getImm32(), code, k); - } - - // eor dst, dst, src - emit32(ARMV8A::EOR | dst | (dst << 5) | (src << 16), code, k); - - reg_changed_offset[instr.dst] = k; - codePos = k; -} - -void JitCompilerA64::h_IXOR_M(Instruction& instr, uint32_t& codePos) -{ - uint32_t k = codePos; - - const uint32_t src = IntRegMap[instr.src]; - const uint32_t dst = IntRegMap[instr.dst]; - - constexpr uint32_t tmp_reg = 18; - emitMemLoad(dst, src, instr, code, k); - - // eor dst, dst, tmp_reg - emit32(ARMV8A::EOR | dst | (dst << 5) | (tmp_reg << 16), code, k); - - reg_changed_offset[instr.dst] = k; - codePos = k; -} - -void JitCompilerA64::h_IROR_R(Instruction& instr, uint32_t& codePos) -{ - const uint32_t src = IntRegMap[instr.src]; - const uint32_t dst = IntRegMap[instr.dst]; - - if (src != dst) - { - // ror dst, dst, src - emit32(ARMV8A::ROR | dst | (dst << 5) | (src << 16), code, codePos); - } - else - { - // ror dst, dst, imm - emit32(ARMV8A::ROR_IMM | dst | (dst << 5) | ((instr.getImm32() & 63) << 10) | (dst << 16), code, codePos); - } - - reg_changed_offset[instr.dst] = codePos; -} - -void JitCompilerA64::h_IROL_R(Instruction& instr, uint32_t& codePos) -{ - uint32_t k = codePos; - - const uint32_t src = IntRegMap[instr.src]; - const uint32_t dst = IntRegMap[instr.dst]; - - if (src != dst) - { - constexpr uint32_t tmp_reg = 18; - - // sub tmp_reg, xzr, src - emit32(ARMV8A::SUB | tmp_reg | (31 << 5) | (src << 16), code, k); - - // ror dst, dst, tmp_reg - emit32(ARMV8A::ROR | dst | (dst << 5) | (tmp_reg << 16), code, k); - } - else - { - // ror dst, dst, imm - emit32(ARMV8A::ROR_IMM | dst | (dst << 5) | ((-instr.getImm32() & 63) << 10) | (dst << 16), code, k); - } - - reg_changed_offset[instr.dst] = k; - codePos = k; -} - -void JitCompilerA64::h_ISWAP_R(Instruction& instr, uint32_t& codePos) -{ - const uint32_t src = IntRegMap[instr.src]; - const uint32_t dst = IntRegMap[instr.dst]; - - if (src == dst) - return; - - uint32_t k = codePos; - - constexpr uint32_t tmp_reg = 18; - emit32(ARMV8A::MOV_REG | tmp_reg | (dst << 16), code, k); - emit32(ARMV8A::MOV_REG | dst | (src << 16), code, k); - emit32(ARMV8A::MOV_REG | src | (tmp_reg << 16), code, k); - - reg_changed_offset[instr.src] = k; - reg_changed_offset[instr.dst] = k; - codePos = k; -} - -void JitCompilerA64::h_FSWAP_R(Instruction& instr, uint32_t& codePos) -{ - uint32_t k = codePos; - - const uint32_t dst = instr.dst + 16; - - constexpr uint32_t tmp_reg_fp = 28; - constexpr uint32_t src_index1 = 1 << 14; - constexpr uint32_t dst_index1 = 1 << 20; - - emit32(ARMV8A::MOV_VREG_EL | tmp_reg_fp | (dst << 5) | src_index1, code, k); - emit32(ARMV8A::MOV_VREG_EL | dst | (dst << 5) | dst_index1, code, k); - emit32(ARMV8A::MOV_VREG_EL | dst | (tmp_reg_fp << 5), code, k); - - codePos = k; -} - -void JitCompilerA64::h_FADD_R(Instruction& instr, uint32_t& codePos) -{ - const uint32_t src = (instr.src % 4) + 24; - const uint32_t dst = (instr.dst % 4) + 16; - - emit32(ARMV8A::FADD | dst | (dst << 5) | (src << 16), code, codePos); -} - -void JitCompilerA64::h_FADD_M(Instruction& instr, uint32_t& codePos) -{ - uint32_t k = codePos; - - const uint32_t src = IntRegMap[instr.src]; - const uint32_t dst = (instr.dst % 4) + 16; - - constexpr uint32_t tmp_reg_fp = 28; - emitMemLoadFP(src, instr, code, k); - - emit32(ARMV8A::FADD | dst | (dst << 5) | (tmp_reg_fp << 16), code, k); - - codePos = k; -} - -void JitCompilerA64::h_FSUB_R(Instruction& instr, uint32_t& codePos) -{ - const uint32_t src = (instr.src % 4) + 24; - const uint32_t dst = (instr.dst % 4) + 16; - - emit32(ARMV8A::FSUB | dst | (dst << 5) | (src << 16), code, codePos); -} - -void JitCompilerA64::h_FSUB_M(Instruction& instr, uint32_t& codePos) -{ - uint32_t k = codePos; - - const uint32_t src = IntRegMap[instr.src]; - const uint32_t dst = (instr.dst % 4) + 16; - - constexpr uint32_t tmp_reg_fp = 28; - emitMemLoadFP(src, instr, code, k); - - emit32(ARMV8A::FSUB | dst | (dst << 5) | (tmp_reg_fp << 16), code, k); - - codePos = k; -} - -void JitCompilerA64::h_FSCAL_R(Instruction& instr, uint32_t& codePos) -{ - const uint32_t dst = (instr.dst % 4) + 16; - - emit32(ARMV8A::FEOR | dst | (dst << 5) | (31 << 16), code, codePos); -} - -void JitCompilerA64::h_FMUL_R(Instruction& instr, uint32_t& codePos) -{ - const uint32_t src = (instr.src % 4) + 24; - const uint32_t dst = (instr.dst % 4) + 20; - - emit32(ARMV8A::FMUL | dst | (dst << 5) | (src << 16), code, codePos); -} - -void JitCompilerA64::h_FDIV_M(Instruction& instr, uint32_t& codePos) -{ - uint32_t k = codePos; - - const uint32_t src = IntRegMap[instr.src]; - const uint32_t dst = (instr.dst % 4) + 20; - - constexpr uint32_t tmp_reg_fp = 28; - emitMemLoadFP(src, instr, code, k); - - // and tmp_reg_fp, tmp_reg_fp, and_mask_reg - emit32(0x4E201C00 | tmp_reg_fp | (tmp_reg_fp << 5) | (29 << 16), code, k); - - // orr tmp_reg_fp, tmp_reg_fp, or_mask_reg - emit32(0x4EA01C00 | tmp_reg_fp | (tmp_reg_fp << 5) | (30 << 16), code, k); - - emit32(ARMV8A::FDIV | dst | (dst << 5) | (tmp_reg_fp << 16), code, k); - - codePos = k; -} - -void JitCompilerA64::h_FSQRT_R(Instruction& instr, uint32_t& codePos) -{ - const uint32_t dst = (instr.dst % 4) + 20; - - emit32(ARMV8A::FSQRT | dst | (dst << 5), code, codePos); -} - -void JitCompilerA64::h_CBRANCH(Instruction& instr, uint32_t& codePos) -{ - uint32_t k = codePos; - - const uint32_t dst = IntRegMap[instr.dst]; - const uint32_t modCond = instr.getModCond(); - const uint32_t shift = modCond + ConditionOffset; - const uint32_t imm = (instr.getImm32() | (1U << shift)) & ~(1U << (shift - 1)); - - emitAddImmediate(dst, dst, imm, code, k); - - // tst dst, mask - static_assert((ConditionMask == 0xFF) && (ConditionOffset == 8), "Update tst encoding for different mask and offset"); - emit32((0xF2781C1F - (modCond << 16)) | (dst << 5), code, k); - - int32_t offset = reg_changed_offset[instr.dst]; - offset = ((offset - k) >> 2) & ((1 << 19) - 1); - - // beq target - emit32(0x54000000 | (offset << 5), code, k); - - for (uint32_t i = 0; i < RegistersCount; ++i) - reg_changed_offset[i] = k; - - codePos = k; -} - -void JitCompilerA64::h_CFROUND(Instruction& instr, uint32_t& codePos) -{ - uint32_t k = codePos; - - const uint32_t src = IntRegMap[instr.src]; - - constexpr uint32_t tmp_reg = 18; - constexpr uint32_t fpcr_tmp_reg = 8; - - // ror tmp_reg, src, imm - emit32(ARMV8A::ROR_IMM | tmp_reg | (src << 5) | ((instr.getImm32() & 63) << 10) | (src << 16), code, k); - - // bfi fpcr_tmp_reg, tmp_reg, 40, 2 - emit32(0xB3580400 | fpcr_tmp_reg | (tmp_reg << 5), code, k); - - // rbit tmp_reg, fpcr_tmp_reg - emit32(0xDAC00000 | tmp_reg | (fpcr_tmp_reg << 5), code, k); - - // msr fpcr, tmp_reg - emit32(0xD51B4400 | tmp_reg, code, k); - - codePos = k; -} - -void JitCompilerA64::h_ISTORE(Instruction& instr, uint32_t& codePos) -{ - uint32_t k = codePos; - - const uint32_t src = IntRegMap[instr.src]; - const uint32_t dst = IntRegMap[instr.dst]; - constexpr uint32_t tmp_reg = 18; - - uint32_t imm = instr.getImm32(); - - if (instr.getModCond() < StoreL3Condition) - imm &= instr.getModMem() ? (RANDOMX_SCRATCHPAD_L1 - 1) : (RANDOMX_SCRATCHPAD_L2 - 1); - else - imm &= RANDOMX_SCRATCHPAD_L3 - 1; - - emitAddImmediate(tmp_reg, dst, imm, code, k); - - constexpr uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5); - constexpr uint32_t andInstrL1 = t | ((Log2(RANDOMX_SCRATCHPAD_L1) - 4) << 10); - constexpr uint32_t andInstrL2 = t | ((Log2(RANDOMX_SCRATCHPAD_L2) - 4) << 10); - constexpr uint32_t andInstrL3 = t | ((Log2(RANDOMX_SCRATCHPAD_L3) - 4) << 10); - - emit32((instr.getModCond() < StoreL3Condition) ? (instr.getModMem() ? andInstrL1 : andInstrL2) : andInstrL3, code, k); - - // str src, [x2, tmp_reg] - emit32(0xF8206840 | src | (tmp_reg << 16), code, k); - - codePos = k; -} - -void JitCompilerA64::h_NOP(Instruction& instr, uint32_t& codePos) -{ -} - -#include "instruction_weights.hpp" -#define INST_HANDLE(x) REPN(&JitCompilerA64::h_##x, WT(x)) - - InstructionGeneratorA64 JitCompilerA64::engine[256] = { - INST_HANDLE(IADD_RS) - INST_HANDLE(IADD_M) - INST_HANDLE(ISUB_R) - INST_HANDLE(ISUB_M) - INST_HANDLE(IMUL_R) - INST_HANDLE(IMUL_M) - INST_HANDLE(IMULH_R) - INST_HANDLE(IMULH_M) - INST_HANDLE(ISMULH_R) - INST_HANDLE(ISMULH_M) - INST_HANDLE(IMUL_RCP) - INST_HANDLE(INEG_R) - INST_HANDLE(IXOR_R) - INST_HANDLE(IXOR_M) - INST_HANDLE(IROR_R) - INST_HANDLE(IROL_R) - INST_HANDLE(ISWAP_R) - INST_HANDLE(FSWAP_R) - INST_HANDLE(FADD_R) - INST_HANDLE(FADD_M) - INST_HANDLE(FSUB_R) - INST_HANDLE(FSUB_M) - INST_HANDLE(FSCAL_R) - INST_HANDLE(FMUL_R) - INST_HANDLE(FDIV_M) - INST_HANDLE(FSQRT_R) - INST_HANDLE(CBRANCH) - INST_HANDLE(CFROUND) - INST_HANDLE(ISTORE) - INST_HANDLE(NOP) - }; -} diff --git a/external/src/randomx/src/jit_compiler_a64.hpp b/external/src/randomx/src/jit_compiler_a64.hpp deleted file mode 100644 index a02824f..0000000 --- a/external/src/randomx/src/jit_compiler_a64.hpp +++ /dev/null @@ -1,128 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador -Copyright (c) 2019, SChernykh - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include -#include -#include -#include "common.hpp" -#include "jit_compiler_a64_static.hpp" - -namespace randomx { - - class Program; - struct ProgramConfiguration; - class SuperscalarProgram; - class Instruction; - - typedef void(JitCompilerA64::*InstructionGeneratorA64)(Instruction&, uint32_t&); - - class JitCompilerA64 { - public: - JitCompilerA64(); - ~JitCompilerA64(); - - void generateProgram(Program&, ProgramConfiguration&); - void generateProgramLight(Program&, ProgramConfiguration&, uint32_t); - - template - void generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector &); - - void generateDatasetInitCode() {} - - ProgramFunc* getProgramFunc() { return reinterpret_cast(code); } - DatasetInitFunc* getDatasetInitFunc(); - uint8_t* getCode() { return code; } - size_t getCodeSize(); - - void enableWriting(); - void enableExecution(); - void enableAll(); - - private: - static InstructionGeneratorA64 engine[256]; - uint32_t reg_changed_offset[8]; - uint8_t* code; - uint32_t literalPos; - uint32_t num32bitLiterals; - - static void emit32(uint32_t val, uint8_t* code, uint32_t& codePos) - { - *(uint32_t*)(code + codePos) = val; - codePos += sizeof(val); - } - - static void emit64(uint64_t val, uint8_t* code, uint32_t& codePos) - { - *(uint64_t*)(code + codePos) = val; - codePos += sizeof(val); - } - - void emitMovImmediate(uint32_t dst, uint32_t imm, uint8_t* code, uint32_t& codePos); - void emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm, uint8_t* code, uint32_t& codePos); - - template - void emitMemLoad(uint32_t dst, uint32_t src, Instruction& instr, uint8_t* code, uint32_t& codePos); - - template - void emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* code, uint32_t& codePos); - - void h_IADD_RS(Instruction&, uint32_t&); - void h_IADD_M(Instruction&, uint32_t&); - void h_ISUB_R(Instruction&, uint32_t&); - void h_ISUB_M(Instruction&, uint32_t&); - void h_IMUL_R(Instruction&, uint32_t&); - void h_IMUL_M(Instruction&, uint32_t&); - void h_IMULH_R(Instruction&, uint32_t&); - void h_IMULH_M(Instruction&, uint32_t&); - void h_ISMULH_R(Instruction&, uint32_t&); - void h_ISMULH_M(Instruction&, uint32_t&); - void h_IMUL_RCP(Instruction&, uint32_t&); - void h_INEG_R(Instruction&, uint32_t&); - void h_IXOR_R(Instruction&, uint32_t&); - void h_IXOR_M(Instruction&, uint32_t&); - void h_IROR_R(Instruction&, uint32_t&); - void h_IROL_R(Instruction&, uint32_t&); - void h_ISWAP_R(Instruction&, uint32_t&); - void h_FSWAP_R(Instruction&, uint32_t&); - void h_FADD_R(Instruction&, uint32_t&); - void h_FADD_M(Instruction&, uint32_t&); - void h_FSUB_R(Instruction&, uint32_t&); - void h_FSUB_M(Instruction&, uint32_t&); - void h_FSCAL_R(Instruction&, uint32_t&); - void h_FMUL_R(Instruction&, uint32_t&); - void h_FDIV_M(Instruction&, uint32_t&); - void h_FSQRT_R(Instruction&, uint32_t&); - void h_CBRANCH(Instruction&, uint32_t&); - void h_CFROUND(Instruction&, uint32_t&); - void h_ISTORE(Instruction&, uint32_t&); - void h_NOP(Instruction&, uint32_t&); - }; -} diff --git a/external/src/randomx/src/jit_compiler_a64_static.S b/external/src/randomx/src/jit_compiler_a64_static.S deleted file mode 100644 index 7fe6599..0000000 --- a/external/src/randomx/src/jit_compiler_a64_static.S +++ /dev/null @@ -1,588 +0,0 @@ -# Copyright (c) 2018-2019, tevador -# Copyright (c) 2019, SChernykh -# -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the copyright holder nor the -# names of its contributors may be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#if defined(__APPLE__) -#define DECL(x) _##x -#else -#define DECL(x) x -#endif - - .arch armv8-a - .text - .global DECL(randomx_program_aarch64) - .global DECL(randomx_program_aarch64_main_loop) - .global DECL(randomx_program_aarch64_vm_instructions) - .global DECL(randomx_program_aarch64_imul_rcp_literals_end) - .global DECL(randomx_program_aarch64_vm_instructions_end) - .global DECL(randomx_program_aarch64_cacheline_align_mask1) - .global DECL(randomx_program_aarch64_cacheline_align_mask2) - .global DECL(randomx_program_aarch64_update_spMix1) - .global DECL(randomx_program_aarch64_vm_instructions_end_light) - .global DECL(randomx_program_aarch64_light_cacheline_align_mask) - .global DECL(randomx_program_aarch64_light_dataset_offset) - .global DECL(randomx_init_dataset_aarch64) - .global DECL(randomx_init_dataset_aarch64_end) - .global DECL(randomx_calc_dataset_item_aarch64) - .global DECL(randomx_calc_dataset_item_aarch64_prefetch) - .global DECL(randomx_calc_dataset_item_aarch64_mix) - .global DECL(randomx_calc_dataset_item_aarch64_store_result) - .global DECL(randomx_calc_dataset_item_aarch64_end) - -#include "configuration.h" - -# Register allocation - -# x0 -> pointer to reg buffer and then literal for IMUL_RCP -# x1 -> pointer to mem buffer and then to dataset -# x2 -> pointer to scratchpad -# x3 -> loop counter -# x4 -> "r0" -# x5 -> "r1" -# x6 -> "r2" -# x7 -> "r3" -# x8 -> fpcr (reversed bits) -# x9 -> mx, ma -# x10 -> spMix1 -# x11 -> literal for IMUL_RCP -# x12 -> "r4" -# x13 -> "r5" -# x14 -> "r6" -# x15 -> "r7" -# x16 -> spAddr0 -# x17 -> spAddr1 -# x18 -> temporary -# x19 -> temporary -# x20 -> literal for IMUL_RCP -# x21 -> literal for IMUL_RCP -# x22 -> literal for IMUL_RCP -# x23 -> literal for IMUL_RCP -# x24 -> literal for IMUL_RCP -# x25 -> literal for IMUL_RCP -# x26 -> literal for IMUL_RCP -# x27 -> literal for IMUL_RCP -# x28 -> literal for IMUL_RCP -# x29 -> literal for IMUL_RCP -# x30 -> literal for IMUL_RCP - -# v0-v15 -> store 32-bit literals -# v16 -> "f0" -# v17 -> "f1" -# v18 -> "f2" -# v19 -> "f3" -# v20 -> "e0" -# v21 -> "e1" -# v22 -> "e2" -# v23 -> "e3" -# v24 -> "a0" -# v25 -> "a1" -# v26 -> "a2" -# v27 -> "a3" -# v28 -> temporary -# v29 -> E 'and' mask = 0x00ffffffffffffff00ffffffffffffff -# v30 -> E 'or' mask = 0x3*00000000******3*00000000****** -# v31 -> scale mask = 0x81f000000000000081f0000000000000 - - .balign 4 -DECL(randomx_program_aarch64): - # Save callee-saved registers - sub sp, sp, 192 - stp x16, x17, [sp] - stp x18, x19, [sp, 16] - stp x20, x21, [sp, 32] - stp x22, x23, [sp, 48] - stp x24, x25, [sp, 64] - stp x26, x27, [sp, 80] - stp x28, x29, [sp, 96] - stp x8, x30, [sp, 112] - stp d8, d9, [sp, 128] - stp d10, d11, [sp, 144] - stp d12, d13, [sp, 160] - stp d14, d15, [sp, 176] - - # Zero integer registers - mov x4, xzr - mov x5, xzr - mov x6, xzr - mov x7, xzr - mov x12, xzr - mov x13, xzr - mov x14, xzr - mov x15, xzr - - # Load ma, mx and dataset pointer - ldp x9, x1, [x1] - - # Load initial spMix value - mov x10, x9 - - # Load group A registers - ldp q24, q25, [x0, 192] - ldp q26, q27, [x0, 224] - - # Load E 'and' mask - mov x16, 0x00FFFFFFFFFFFFFF - ins v29.d[0], x16 - ins v29.d[1], x16 - - # Load E 'or' mask (stored in reg.f[0]) - ldr q30, [x0, 64] - - # Load scale mask - mov x16, 0x80f0000000000000 - ins v31.d[0], x16 - ins v31.d[1], x16 - - # Read fpcr - mrs x8, fpcr - rbit x8, x8 - - # Save x0 - str x0, [sp, -16]! - - # Read literals - ldr x0, literal_x0 - ldr x11, literal_x11 - ldr x20, literal_x20 - ldr x21, literal_x21 - ldr x22, literal_x22 - ldr x23, literal_x23 - ldr x24, literal_x24 - ldr x25, literal_x25 - ldr x26, literal_x26 - ldr x27, literal_x27 - ldr x28, literal_x28 - ldr x29, literal_x29 - ldr x30, literal_x30 - - ldr q0, literal_v0 - ldr q1, literal_v1 - ldr q2, literal_v2 - ldr q3, literal_v3 - ldr q4, literal_v4 - ldr q5, literal_v5 - ldr q6, literal_v6 - ldr q7, literal_v7 - ldr q8, literal_v8 - ldr q9, literal_v9 - ldr q10, literal_v10 - ldr q11, literal_v11 - ldr q12, literal_v12 - ldr q13, literal_v13 - ldr q14, literal_v14 - ldr q15, literal_v15 - -DECL(randomx_program_aarch64_main_loop): - # spAddr0 = spMix1 & ScratchpadL3Mask64; - # spAddr1 = (spMix1 >> 32) & ScratchpadL3Mask64; - lsr x18, x10, 32 - - # Actual mask will be inserted by JIT compiler - and w16, w10, 1 - and w17, w18, 1 - - # x16 = scratchpad + spAddr0 - # x17 = scratchpad + spAddr1 - add x16, x16, x2 - add x17, x17, x2 - - # xor integer registers with scratchpad data (spAddr0) - ldp x18, x19, [x16] - eor x4, x4, x18 - eor x5, x5, x19 - ldp x18, x19, [x16, 16] - eor x6, x6, x18 - eor x7, x7, x19 - ldp x18, x19, [x16, 32] - eor x12, x12, x18 - eor x13, x13, x19 - ldp x18, x19, [x16, 48] - eor x14, x14, x18 - eor x15, x15, x19 - - # Load group F registers (spAddr1) - ldpsw x18, x19, [x17] - ins v16.d[0], x18 - ins v16.d[1], x19 - ldpsw x18, x19, [x17, 8] - ins v17.d[0], x18 - ins v17.d[1], x19 - ldpsw x18, x19, [x17, 16] - ins v18.d[0], x18 - ins v18.d[1], x19 - ldpsw x18, x19, [x17, 24] - ins v19.d[0], x18 - ins v19.d[1], x19 - scvtf v16.2d, v16.2d - scvtf v17.2d, v17.2d - scvtf v18.2d, v18.2d - scvtf v19.2d, v19.2d - - # Load group E registers (spAddr1) - ldpsw x18, x19, [x17, 32] - ins v20.d[0], x18 - ins v20.d[1], x19 - ldpsw x18, x19, [x17, 40] - ins v21.d[0], x18 - ins v21.d[1], x19 - ldpsw x18, x19, [x17, 48] - ins v22.d[0], x18 - ins v22.d[1], x19 - ldpsw x18, x19, [x17, 56] - ins v23.d[0], x18 - ins v23.d[1], x19 - scvtf v20.2d, v20.2d - scvtf v21.2d, v21.2d - scvtf v22.2d, v22.2d - scvtf v23.2d, v23.2d - and v20.16b, v20.16b, v29.16b - and v21.16b, v21.16b, v29.16b - and v22.16b, v22.16b, v29.16b - and v23.16b, v23.16b, v29.16b - orr v20.16b, v20.16b, v30.16b - orr v21.16b, v21.16b, v30.16b - orr v22.16b, v22.16b, v30.16b - orr v23.16b, v23.16b, v30.16b - - # Execute VM instructions -DECL(randomx_program_aarch64_vm_instructions): - - # buffer for generated instructions - # FDIV_M is the largest instruction taking up to 12 ARMv8 instructions - .fill RANDOMX_PROGRAM_SIZE*12,4,0 - -literal_x0: .fill 1,8,0 -literal_x11: .fill 1,8,0 -literal_x20: .fill 1,8,0 -literal_x21: .fill 1,8,0 -literal_x22: .fill 1,8,0 -literal_x23: .fill 1,8,0 -literal_x24: .fill 1,8,0 -literal_x25: .fill 1,8,0 -literal_x26: .fill 1,8,0 -literal_x27: .fill 1,8,0 -literal_x28: .fill 1,8,0 -literal_x29: .fill 1,8,0 -literal_x30: .fill 1,8,0 -DECL(randomx_program_aarch64_imul_rcp_literals_end): - -literal_v0: .fill 2,8,0 -literal_v1: .fill 2,8,0 -literal_v2: .fill 2,8,0 -literal_v3: .fill 2,8,0 -literal_v4: .fill 2,8,0 -literal_v5: .fill 2,8,0 -literal_v6: .fill 2,8,0 -literal_v7: .fill 2,8,0 -literal_v8: .fill 2,8,0 -literal_v9: .fill 2,8,0 -literal_v10: .fill 2,8,0 -literal_v11: .fill 2,8,0 -literal_v12: .fill 2,8,0 -literal_v13: .fill 2,8,0 -literal_v14: .fill 2,8,0 -literal_v15: .fill 2,8,0 - -DECL(randomx_program_aarch64_vm_instructions_end): - # Calculate dataset pointer for dataset read - # Do it here to break false dependency from readReg2 and readReg3 (see next line) - lsr x10, x9, 32 - - # mx ^= r[readReg2] ^ r[readReg3]; - eor x9, x9, x18 - - # Calculate dataset pointer for dataset prefetch - mov w18, w9 -DECL(randomx_program_aarch64_cacheline_align_mask1): - # Actual mask will be inserted by JIT compiler - and x18, x18, 1 - add x18, x18, x1 - - # Prefetch dataset data - prfm pldl2strm, [x18] - - # mx <-> ma - ror x9, x9, 32 - -DECL(randomx_program_aarch64_cacheline_align_mask2): - # Actual mask will be inserted by JIT compiler - and x10, x10, 1 - add x10, x10, x1 - -DECL(randomx_program_aarch64_xor_with_dataset_line): - # xor integer registers with dataset data - ldp x18, x19, [x10] - eor x4, x4, x18 - eor x5, x5, x19 - ldp x18, x19, [x10, 16] - eor x6, x6, x18 - eor x7, x7, x19 - ldp x18, x19, [x10, 32] - eor x12, x12, x18 - eor x13, x13, x19 - ldp x18, x19, [x10, 48] - eor x14, x14, x18 - eor x15, x15, x19 - -DECL(randomx_program_aarch64_update_spMix1): - # JIT compiler will replace it with "eor x10, config.readReg0, config.readReg1" - eor x10, x0, x0 - - # Store integer registers to scratchpad (spAddr1) - stp x4, x5, [x17, 0] - stp x6, x7, [x17, 16] - stp x12, x13, [x17, 32] - stp x14, x15, [x17, 48] - - # xor group F and group E registers - eor v16.16b, v16.16b, v20.16b - eor v17.16b, v17.16b, v21.16b - eor v18.16b, v18.16b, v22.16b - eor v19.16b, v19.16b, v23.16b - - # Store FP registers to scratchpad (spAddr0) - stp q16, q17, [x16, 0] - stp q18, q19, [x16, 32] - - subs x3, x3, 1 - bne DECL(randomx_program_aarch64_main_loop) - - # Restore x0 - ldr x0, [sp], 16 - - # Store integer registers - stp x4, x5, [x0, 0] - stp x6, x7, [x0, 16] - stp x12, x13, [x0, 32] - stp x14, x15, [x0, 48] - - # Store FP registers - stp q16, q17, [x0, 64] - stp q18, q19, [x0, 96] - stp q20, q21, [x0, 128] - stp q22, q23, [x0, 160] - - # Restore callee-saved registers - ldp x16, x17, [sp] - ldp x18, x19, [sp, 16] - ldp x20, x21, [sp, 32] - ldp x22, x23, [sp, 48] - ldp x24, x25, [sp, 64] - ldp x26, x27, [sp, 80] - ldp x28, x29, [sp, 96] - ldp x8, x30, [sp, 112] - ldp d8, d9, [sp, 128] - ldp d10, d11, [sp, 144] - ldp d12, d13, [sp, 160] - ldp d14, d15, [sp, 176] - add sp, sp, 192 - - ret - -DECL(randomx_program_aarch64_vm_instructions_end_light): - sub sp, sp, 96 - stp x0, x1, [sp, 64] - stp x2, x30, [sp, 80] - - # mx ^= r[readReg2] ^ r[readReg3]; - eor x9, x9, x18 - - # mx <-> ma - ror x9, x9, 32 - - # x0 -> pointer to cache memory - mov x0, x1 - - # x1 -> pointer to output - mov x1, sp - -DECL(randomx_program_aarch64_light_cacheline_align_mask): - # Actual mask will be inserted by JIT compiler - and w2, w9, 1 - - # x2 -> item number - lsr x2, x2, 6 - -DECL(randomx_program_aarch64_light_dataset_offset): - # Apply dataset offset (filled in by JIT compiler) - add x2, x2, 0 - add x2, x2, 0 - - bl DECL(randomx_calc_dataset_item_aarch64) - - mov x10, sp - ldp x0, x1, [sp, 64] - ldp x2, x30, [sp, 80] - add sp, sp, 96 - - b DECL(randomx_program_aarch64_xor_with_dataset_line) - - - -# Input parameters -# -# x0 -> pointer to cache -# x1 -> pointer to dataset memory at startItem -# x2 -> start item -# x3 -> end item - -DECL(randomx_init_dataset_aarch64): - # Save x30 (return address) - str x30, [sp, -16]! - - # Load pointer to cache memory - ldr x0, [x0] - -DECL(randomx_init_dataset_aarch64_main_loop): - bl DECL(randomx_calc_dataset_item_aarch64) - add x1, x1, 64 - add x2, x2, 1 - cmp x2, x3 - bne DECL(randomx_init_dataset_aarch64_main_loop) - - # Restore x30 (return address) - ldr x30, [sp], 16 - - ret - -DECL(randomx_init_dataset_aarch64_end): - -# Input parameters -# -# x0 -> pointer to cache memory -# x1 -> pointer to output -# x2 -> item number -# -# Register allocation -# -# x0-x7 -> output value (calculated dataset item) -# x8 -> pointer to cache memory -# x9 -> pointer to output -# x10 -> registerValue -# x11 -> mixBlock -# x12 -> temporary -# x13 -> temporary - -DECL(randomx_calc_dataset_item_aarch64): - sub sp, sp, 112 - stp x0, x1, [sp] - stp x2, x3, [sp, 16] - stp x4, x5, [sp, 32] - stp x6, x7, [sp, 48] - stp x8, x9, [sp, 64] - stp x10, x11, [sp, 80] - stp x12, x13, [sp, 96] - - ldr x12, superscalarMul0 - - mov x8, x0 - mov x9, x1 - mov x10, x2 - - # rl[0] = (itemNumber + 1) * superscalarMul0; - madd x0, x2, x12, x12 - - # rl[1] = rl[0] ^ superscalarAdd1; - ldr x12, superscalarAdd1 - eor x1, x0, x12 - - # rl[2] = rl[0] ^ superscalarAdd2; - ldr x12, superscalarAdd2 - eor x2, x0, x12 - - # rl[3] = rl[0] ^ superscalarAdd3; - ldr x12, superscalarAdd3 - eor x3, x0, x12 - - # rl[4] = rl[0] ^ superscalarAdd4; - ldr x12, superscalarAdd4 - eor x4, x0, x12 - - # rl[5] = rl[0] ^ superscalarAdd5; - ldr x12, superscalarAdd5 - eor x5, x0, x12 - - # rl[6] = rl[0] ^ superscalarAdd6; - ldr x12, superscalarAdd6 - eor x6, x0, x12 - - # rl[7] = rl[0] ^ superscalarAdd7; - ldr x12, superscalarAdd7 - eor x7, x0, x12 - - b DECL(randomx_calc_dataset_item_aarch64_prefetch) - -superscalarMul0: .quad 6364136223846793005 -superscalarAdd1: .quad 9298411001130361340 -superscalarAdd2: .quad 12065312585734608966 -superscalarAdd3: .quad 9306329213124626780 -superscalarAdd4: .quad 5281919268842080866 -superscalarAdd5: .quad 10536153434571861004 -superscalarAdd6: .quad 3398623926847679864 -superscalarAdd7: .quad 9549104520008361294 - -# Prefetch -> SuperScalar hash -> Mix will be repeated N times - -DECL(randomx_calc_dataset_item_aarch64_prefetch): - # Actual mask will be inserted by JIT compiler - and x11, x10, 1 - add x11, x8, x11, lsl 6 - prfm pldl2strm, [x11] - - # Generated SuperScalar hash program goes here - -DECL(randomx_calc_dataset_item_aarch64_mix): - ldp x12, x13, [x11] - eor x0, x0, x12 - eor x1, x1, x13 - ldp x12, x13, [x11, 16] - eor x2, x2, x12 - eor x3, x3, x13 - ldp x12, x13, [x11, 32] - eor x4, x4, x12 - eor x5, x5, x13 - ldp x12, x13, [x11, 48] - eor x6, x6, x12 - eor x7, x7, x13 - -DECL(randomx_calc_dataset_item_aarch64_store_result): - stp x0, x1, [x9] - stp x2, x3, [x9, 16] - stp x4, x5, [x9, 32] - stp x6, x7, [x9, 48] - - ldp x0, x1, [sp] - ldp x2, x3, [sp, 16] - ldp x4, x5, [sp, 32] - ldp x6, x7, [sp, 48] - ldp x8, x9, [sp, 64] - ldp x10, x11, [sp, 80] - ldp x12, x13, [sp, 96] - add sp, sp, 112 - - ret - -DECL(randomx_calc_dataset_item_aarch64_end): diff --git a/external/src/randomx/src/jit_compiler_a64_static.hpp b/external/src/randomx/src/jit_compiler_a64_static.hpp deleted file mode 100644 index a9b922e..0000000 --- a/external/src/randomx/src/jit_compiler_a64_static.hpp +++ /dev/null @@ -1,51 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador -Copyright (c) 2019, SChernykh - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -extern "C" { - void randomx_program_aarch64(void* reg, void* mem, void* scratchpad, uint64_t iterations); - void randomx_program_aarch64_main_loop(); - void randomx_program_aarch64_vm_instructions(); - void randomx_program_aarch64_imul_rcp_literals_end(); - void randomx_program_aarch64_vm_instructions_end(); - void randomx_program_aarch64_cacheline_align_mask1(); - void randomx_program_aarch64_cacheline_align_mask2(); - void randomx_program_aarch64_update_spMix1(); - void randomx_program_aarch64_vm_instructions_end_light(); - void randomx_program_aarch64_light_cacheline_align_mask(); - void randomx_program_aarch64_light_dataset_offset(); - void randomx_init_dataset_aarch64(); - void randomx_init_dataset_aarch64_end(); - void randomx_calc_dataset_item_aarch64(); - void randomx_calc_dataset_item_aarch64_prefetch(); - void randomx_calc_dataset_item_aarch64_mix(); - void randomx_calc_dataset_item_aarch64_store_result(); - void randomx_calc_dataset_item_aarch64_end(); -} diff --git a/external/src/randomx/src/jit_compiler_fallback.hpp b/external/src/randomx/src/jit_compiler_fallback.hpp deleted file mode 100644 index 57a6dbf..0000000 --- a/external/src/randomx/src/jit_compiler_fallback.hpp +++ /dev/null @@ -1,76 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include -#include -#include -#include "common.hpp" - -namespace randomx { - - class Program; - struct ProgramConfiguration; - class SuperscalarProgram; - - class JitCompilerFallback { - public: - JitCompilerFallback() { - throw std::runtime_error("JIT compilation is not supported on this platform"); - } - void generateProgram(Program&, ProgramConfiguration&) { - - } - void generateProgramLight(Program&, ProgramConfiguration&, uint32_t) { - - } - template - void generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector &) { - - } - void generateDatasetInitCode() { - - } - ProgramFunc* getProgramFunc() { - return nullptr; - } - DatasetInitFunc* getDatasetInitFunc() { - return nullptr; - } - uint8_t* getCode() { - return nullptr; - } - size_t getCodeSize() { - return 0; - } - void enableWriting() {} - void enableExecution() {} - void enableAll() {} - }; -} \ No newline at end of file diff --git a/external/src/randomx/src/jit_compiler_x86.cpp b/external/src/randomx/src/jit_compiler_x86.cpp deleted file mode 100644 index e75f763..0000000 --- a/external/src/randomx/src/jit_compiler_x86.cpp +++ /dev/null @@ -1,845 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include -#include -#include -#include "jit_compiler_x86.hpp" -#include "jit_compiler_x86_static.hpp" -#include "superscalar.hpp" -#include "program.hpp" -#include "reciprocal.h" -#include "virtual_memory.hpp" - -namespace randomx { - /* - - REGISTER ALLOCATION: - - ; rax -> temporary - ; rbx -> iteration counter "ic" - ; rcx -> temporary - ; rdx -> temporary - ; rsi -> scratchpad pointer - ; rdi -> dataset pointer - ; rbp -> memory registers "ma" (high 32 bits), "mx" (low 32 bits) - ; rsp -> stack pointer - ; r8 -> "r0" - ; r9 -> "r1" - ; r10 -> "r2" - ; r11 -> "r3" - ; r12 -> "r4" - ; r13 -> "r5" - ; r14 -> "r6" - ; r15 -> "r7" - ; xmm0 -> "f0" - ; xmm1 -> "f1" - ; xmm2 -> "f2" - ; xmm3 -> "f3" - ; xmm4 -> "e0" - ; xmm5 -> "e1" - ; xmm6 -> "e2" - ; xmm7 -> "e3" - ; xmm8 -> "a0" - ; xmm9 -> "a1" - ; xmm10 -> "a2" - ; xmm11 -> "a3" - ; xmm12 -> temporary - ; xmm13 -> E 'and' mask = 0x00ffffffffffffff00ffffffffffffff - ; xmm14 -> E 'or' mask = 0x3*00000000******3*00000000****** - ; xmm15 -> scale mask = 0x81f000000000000081f0000000000000 - - */ - - //Calculate the required code buffer size that is sufficient for the largest possible program: - - constexpr size_t MaxRandomXInstrCodeSize = 32; //FDIV_M requires up to 32 bytes of x86 code - constexpr size_t MaxSuperscalarInstrSize = 14; //IMUL_RCP requires 14 bytes of x86 code - constexpr size_t SuperscalarProgramHeader = 128; //overhead per superscalar program - constexpr size_t CodeAlign = 4096; //align code size to a multiple of 4 KiB - constexpr size_t ReserveCodeSize = CodeAlign; //function prologue/epilogue + reserve - - constexpr size_t RandomXCodeSize = alignSize(ReserveCodeSize + MaxRandomXInstrCodeSize * RANDOMX_PROGRAM_SIZE, CodeAlign); - constexpr size_t SuperscalarSize = alignSize(ReserveCodeSize + (SuperscalarProgramHeader + MaxSuperscalarInstrSize * SuperscalarMaxSize) * RANDOMX_CACHE_ACCESSES, CodeAlign); - - static_assert(RandomXCodeSize < INT32_MAX / 2, "RandomXCodeSize is too large"); - static_assert(SuperscalarSize < INT32_MAX / 2, "SuperscalarSize is too large"); - - constexpr uint32_t CodeSize = RandomXCodeSize + SuperscalarSize; - - constexpr int32_t superScalarHashOffset = RandomXCodeSize; - -#if defined(_MSC_VER) && (defined(_DEBUG) || defined (RELWITHDEBINFO)) -#define ADDR(x) ((((uint8_t*)&x)[0] == 0xE9) ? (((uint8_t*)&x) + *(const int32_t*)(((uint8_t*)&x) + 1) + 5) : ((uint8_t*)&x)) -#else -#define ADDR(x) ((uint8_t*)&x) -#endif - - const uint8_t* codePrologue = ADDR(randomx_program_prologue); - const uint8_t* codeLoopBegin = ADDR(randomx_program_loop_begin); - const uint8_t* codeLoopLoad = ADDR(randomx_program_loop_load); - const uint8_t* codeProgamStart = ADDR(randomx_program_start); - const uint8_t* codeReadDataset = ADDR(randomx_program_read_dataset); - const uint8_t* codeReadDatasetLightSshInit = ADDR(randomx_program_read_dataset_sshash_init); - const uint8_t* codeReadDatasetLightSshFin = ADDR(randomx_program_read_dataset_sshash_fin); - const uint8_t* codeDatasetInit = ADDR(randomx_dataset_init); - const uint8_t* codeLoopStore = ADDR(randomx_program_loop_store); - const uint8_t* codeLoopEnd = ADDR(randomx_program_loop_end); - const uint8_t* codeEpilogue = ADDR(randomx_program_epilogue); - const uint8_t* codeProgramEnd = ADDR(randomx_program_end); - const uint8_t* codeShhLoad = ADDR(randomx_sshash_load); - const uint8_t* codeShhPrefetch = ADDR(randomx_sshash_prefetch); - const uint8_t* codeShhEnd = ADDR(randomx_sshash_end); - const uint8_t* codeShhInit = ADDR(randomx_sshash_init); - - const int32_t prologueSize = codeLoopBegin - codePrologue; - const int32_t loopLoadSize = codeProgamStart - codeLoopLoad; - const int32_t readDatasetSize = codeReadDatasetLightSshInit - codeReadDataset; - const int32_t readDatasetLightInitSize = codeReadDatasetLightSshFin - codeReadDatasetLightSshInit; - const int32_t readDatasetLightFinSize = codeLoopStore - codeReadDatasetLightSshFin; - const int32_t loopStoreSize = codeLoopEnd - codeLoopStore; - const int32_t datasetInitSize = codeEpilogue - codeDatasetInit; - const int32_t epilogueSize = codeShhLoad - codeEpilogue; - const int32_t codeSshLoadSize = codeShhPrefetch - codeShhLoad; - const int32_t codeSshPrefetchSize = codeShhEnd - codeShhPrefetch; - const int32_t codeSshInitSize = codeProgramEnd - codeShhInit; - - const int32_t epilogueOffset = CodeSize - epilogueSize; - - static const uint8_t REX_ADD_RR[] = { 0x4d, 0x03 }; - static const uint8_t REX_ADD_RM[] = { 0x4c, 0x03 }; - static const uint8_t REX_SUB_RR[] = { 0x4d, 0x2b }; - static const uint8_t REX_SUB_RM[] = { 0x4c, 0x2b }; - static const uint8_t REX_MOV_RR[] = { 0x41, 0x8b }; - static const uint8_t REX_MOV_RR64[] = { 0x49, 0x8b }; - static const uint8_t REX_MOV_R64R[] = { 0x4c, 0x8b }; - static const uint8_t REX_IMUL_RR[] = { 0x4d, 0x0f, 0xaf }; - static const uint8_t REX_IMUL_RRI[] = { 0x4d, 0x69 }; - static const uint8_t REX_IMUL_RM[] = { 0x4c, 0x0f, 0xaf }; - static const uint8_t REX_MUL_R[] = { 0x49, 0xf7 }; - static const uint8_t REX_MUL_M[] = { 0x48, 0xf7 }; - static const uint8_t REX_81[] = { 0x49, 0x81 }; - static const uint8_t AND_EAX_I = 0x25; - static const uint8_t MOV_EAX_I = 0xb8; - static const uint8_t MOV_RAX_I[] = { 0x48, 0xb8 }; - static const uint8_t MOV_RCX_I[] = { 0x48, 0xb9 }; - static const uint8_t REX_LEA[] = { 0x4f, 0x8d }; - static const uint8_t REX_MUL_MEM[] = { 0x48, 0xf7, 0x24, 0x0e }; - static const uint8_t REX_IMUL_MEM[] = { 0x48, 0xf7, 0x2c, 0x0e }; - static const uint8_t REX_SHR_RAX[] = { 0x48, 0xc1, 0xe8 }; - static const uint8_t RAX_ADD_SBB_1[] = { 0x48, 0x83, 0xC0, 0x01, 0x48, 0x83, 0xD8, 0x00 }; - static const uint8_t MUL_RCX[] = { 0x48, 0xf7, 0xe1 }; - static const uint8_t REX_SHR_RDX[] = { 0x48, 0xc1, 0xea }; - static const uint8_t REX_SH[] = { 0x49, 0xc1 }; - static const uint8_t MOV_RCX_RAX_SAR_RCX_63[] = { 0x48, 0x89, 0xc1, 0x48, 0xc1, 0xf9, 0x3f }; - static const uint8_t AND_ECX_I[] = { 0x81, 0xe1 }; - static const uint8_t ADD_RAX_RCX[] = { 0x48, 0x01, 0xC8 }; - static const uint8_t SAR_RAX_I8[] = { 0x48, 0xC1, 0xF8 }; - static const uint8_t NEG_RAX[] = { 0x48, 0xF7, 0xD8 }; - static const uint8_t ADD_R_RAX[] = { 0x4C, 0x03 }; - static const uint8_t XOR_EAX_EAX[] = { 0x33, 0xC0 }; - static const uint8_t ADD_RDX_R[] = { 0x4c, 0x01 }; - static const uint8_t SUB_RDX_R[] = { 0x4c, 0x29 }; - static const uint8_t SAR_RDX_I8[] = { 0x48, 0xC1, 0xFA }; - static const uint8_t TEST_RDX_RDX[] = { 0x48, 0x85, 0xD2 }; - static const uint8_t SETS_AL_ADD_RDX_RAX[] = { 0x0F, 0x98, 0xC0, 0x48, 0x03, 0xD0 }; - static const uint8_t REX_NEG[] = { 0x49, 0xF7 }; - static const uint8_t REX_XOR_RR[] = { 0x4D, 0x33 }; - static const uint8_t REX_XOR_RI[] = { 0x49, 0x81 }; - static const uint8_t REX_XOR_RM[] = { 0x4c, 0x33 }; - static const uint8_t REX_ROT_CL[] = { 0x49, 0xd3 }; - static const uint8_t REX_ROT_I8[] = { 0x49, 0xc1 }; - static const uint8_t SHUFPD[] = { 0x66, 0x0f, 0xc6 }; - static const uint8_t REX_ADDPD[] = { 0x66, 0x41, 0x0f, 0x58 }; - static const uint8_t REX_CVTDQ2PD_XMM12[] = { 0xf3, 0x44, 0x0f, 0xe6, 0x24, 0x06 }; - static const uint8_t REX_SUBPD[] = { 0x66, 0x41, 0x0f, 0x5c }; - static const uint8_t REX_XORPS[] = { 0x41, 0x0f, 0x57 }; - static const uint8_t REX_MULPD[] = { 0x66, 0x41, 0x0f, 0x59 }; - static const uint8_t REX_MAXPD[] = { 0x66, 0x41, 0x0f, 0x5f }; - static const uint8_t REX_DIVPD[] = { 0x66, 0x41, 0x0f, 0x5e }; - static const uint8_t SQRTPD[] = { 0x66, 0x0f, 0x51 }; - static const uint8_t AND_OR_MOV_LDMXCSR[] = { 0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x50, 0x0F, 0xAE, 0x14, 0x24, 0x58 }; - static const uint8_t ROL_RAX[] = { 0x48, 0xc1, 0xc0 }; - static const uint8_t XOR_ECX_ECX[] = { 0x33, 0xC9 }; - static const uint8_t REX_CMP_R32I[] = { 0x41, 0x81 }; - static const uint8_t REX_CMP_M32I[] = { 0x81, 0x3c, 0x06 }; - static const uint8_t MOVAPD[] = { 0x66, 0x0f, 0x29 }; - static const uint8_t REX_MOV_MR[] = { 0x4c, 0x89 }; - static const uint8_t REX_XOR_EAX[] = { 0x41, 0x33 }; - static const uint8_t SUB_EBX[] = { 0x83, 0xEB, 0x01 }; - static const uint8_t JNZ[] = { 0x0f, 0x85 }; - static const uint8_t JMP = 0xe9; - static const uint8_t REX_XOR_RAX_R64[] = { 0x49, 0x33 }; - static const uint8_t REX_XCHG[] = { 0x4d, 0x87 }; - static const uint8_t REX_ANDPS_XMM12[] = { 0x45, 0x0F, 0x54, 0xE5, 0x45, 0x0F, 0x56, 0xE6 }; - static const uint8_t REX_PADD[] = { 0x66, 0x44, 0x0f }; - static const uint8_t PADD_OPCODES[] = { 0xfc, 0xfd, 0xfe, 0xd4 }; - static const uint8_t CALL = 0xe8; - static const uint8_t REX_ADD_I[] = { 0x49, 0x81 }; - static const uint8_t REX_TEST[] = { 0x49, 0xF7 }; - static const uint8_t JZ[] = { 0x0f, 0x84 }; - static const uint8_t RET = 0xc3; - static const uint8_t LEA_32[] = { 0x41, 0x8d }; - static const uint8_t MOVNTI[] = { 0x4c, 0x0f, 0xc3 }; - static const uint8_t ADD_EBX_I[] = { 0x81, 0xc3 }; - - static const uint8_t NOP1[] = { 0x90 }; - static const uint8_t NOP2[] = { 0x66, 0x90 }; - static const uint8_t NOP3[] = { 0x66, 0x66, 0x90 }; - static const uint8_t NOP4[] = { 0x0F, 0x1F, 0x40, 0x00 }; - static const uint8_t NOP5[] = { 0x0F, 0x1F, 0x44, 0x00, 0x00 }; - static const uint8_t NOP6[] = { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 }; - static const uint8_t NOP7[] = { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 }; - static const uint8_t NOP8[] = { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }; - - static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 }; - - size_t JitCompilerX86::getCodeSize() { - return CodeSize; - } - - JitCompilerX86::JitCompilerX86() { - code = (uint8_t*)allocMemoryPages(CodeSize); - memcpy(code, codePrologue, prologueSize); - memcpy(code + epilogueOffset, codeEpilogue, epilogueSize); - } - - JitCompilerX86::~JitCompilerX86() { - freePagedMemory(code, CodeSize); - } - - void JitCompilerX86::enableAll() { - setPagesRWX(code, CodeSize); - } - - void JitCompilerX86::enableWriting() { - setPagesRW(code, CodeSize); - } - - void JitCompilerX86::enableExecution() { - setPagesRX(code, CodeSize); - } - - void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg) { - generateProgramPrologue(prog, pcfg); - memcpy(code + codePos, codeReadDataset, readDatasetSize); - codePos += readDatasetSize; - generateProgramEpilogue(prog, pcfg); - } - - void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg, uint32_t datasetOffset) { - generateProgramPrologue(prog, pcfg); - emit(codeReadDatasetLightSshInit, readDatasetLightInitSize); - emit(ADD_EBX_I); - emit32(datasetOffset / CacheLineSize); - emitByte(CALL); - emit32(superScalarHashOffset - (codePos + 4)); - emit(codeReadDatasetLightSshFin, readDatasetLightFinSize); - generateProgramEpilogue(prog, pcfg); - } - - template - void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector &reciprocalCache) { - memcpy(code + superScalarHashOffset, codeShhInit, codeSshInitSize); - codePos = superScalarHashOffset + codeSshInitSize; - for (unsigned j = 0; j < N; ++j) { - SuperscalarProgram& prog = programs[j]; - for (unsigned i = 0; i < prog.getSize(); ++i) { - Instruction& instr = prog(i); - generateSuperscalarCode(instr, reciprocalCache); - } - emit(codeShhLoad, codeSshLoadSize); - if (j < N - 1) { - emit(REX_MOV_RR64); - emitByte(0xd8 + prog.getAddressRegister()); - emit(codeShhPrefetch, codeSshPrefetchSize); -#ifdef RANDOMX_ALIGN - int align = (codePos % 16); - while (align != 0) { - int nopSize = 16 - align; - if (nopSize > 8) nopSize = 8; - emit(NOPX[nopSize - 1], nopSize); - align = (codePos % 16); - } -#endif - } - } - emitByte(RET); - } - - template - void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES], std::vector &reciprocalCache); - - void JitCompilerX86::generateDatasetInitCode() { - memcpy(code, codeDatasetInit, datasetInitSize); - } - - void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) { - instructionOffsets.clear(); - for (unsigned i = 0; i < RegistersCount; ++i) { - registerUsage[i] = -1; - } - - codePos = prologueSize; - memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask)); - memcpy(code + codePos, codeLoopLoad, loopLoadSize); - codePos += loopLoadSize; - for (unsigned i = 0; i < prog.getSize(); ++i) { - Instruction& instr = prog(i); - instr.src %= RegistersCount; - instr.dst %= RegistersCount; - generateCode(instr, i); - } - emit(REX_MOV_RR); - emitByte(0xc0 + pcfg.readReg2); - emit(REX_XOR_EAX); - emitByte(0xc0 + pcfg.readReg3); - } - - void JitCompilerX86::generateProgramEpilogue(Program& prog, ProgramConfiguration& pcfg) { - emit(REX_MOV_RR64); - emitByte(0xc0 + pcfg.readReg0); - emit(REX_XOR_RAX_R64); - emitByte(0xc0 + pcfg.readReg1); - emit(ADDR(randomx_prefetch_scratchpad), ADDR(randomx_prefetch_scratchpad_end) - ADDR(randomx_prefetch_scratchpad)); - memcpy(code + codePos, codeLoopStore, loopStoreSize); - codePos += loopStoreSize; - emit(SUB_EBX); - emit(JNZ); - emit32(prologueSize - codePos - 4); - emitByte(JMP); - emit32(epilogueOffset - codePos - 4); - } - - void JitCompilerX86::generateCode(Instruction& instr, int i) { - instructionOffsets.push_back(codePos); - auto generator = engine[instr.opcode]; - (this->*generator)(instr, i); - } - - void JitCompilerX86::generateSuperscalarCode(Instruction& instr, std::vector &reciprocalCache) { - switch ((SuperscalarInstructionType)instr.opcode) - { - case randomx::SuperscalarInstructionType::ISUB_R: - emit(REX_SUB_RR); - emitByte(0xc0 + 8 * instr.dst + instr.src); - break; - case randomx::SuperscalarInstructionType::IXOR_R: - emit(REX_XOR_RR); - emitByte(0xc0 + 8 * instr.dst + instr.src); - break; - case randomx::SuperscalarInstructionType::IADD_RS: - emit(REX_LEA); - emitByte(0x04 + 8 * instr.dst); - genSIB(instr.getModShift(), instr.src, instr.dst); - break; - case randomx::SuperscalarInstructionType::IMUL_R: - emit(REX_IMUL_RR); - emitByte(0xc0 + 8 * instr.dst + instr.src); - break; - case randomx::SuperscalarInstructionType::IROR_C: - emit(REX_ROT_I8); - emitByte(0xc8 + instr.dst); - emitByte(instr.getImm32() & 63); - break; - case randomx::SuperscalarInstructionType::IADD_C7: - emit(REX_81); - emitByte(0xc0 + instr.dst); - emit32(instr.getImm32()); - break; - case randomx::SuperscalarInstructionType::IXOR_C7: - emit(REX_XOR_RI); - emitByte(0xf0 + instr.dst); - emit32(instr.getImm32()); - break; - case randomx::SuperscalarInstructionType::IADD_C8: - emit(REX_81); - emitByte(0xc0 + instr.dst); - emit32(instr.getImm32()); -#ifdef RANDOMX_ALIGN - emit(NOP1); -#endif - break; - case randomx::SuperscalarInstructionType::IXOR_C8: - emit(REX_XOR_RI); - emitByte(0xf0 + instr.dst); - emit32(instr.getImm32()); -#ifdef RANDOMX_ALIGN - emit(NOP1); -#endif - break; - case randomx::SuperscalarInstructionType::IADD_C9: - emit(REX_81); - emitByte(0xc0 + instr.dst); - emit32(instr.getImm32()); -#ifdef RANDOMX_ALIGN - emit(NOP2); -#endif - break; - case randomx::SuperscalarInstructionType::IXOR_C9: - emit(REX_XOR_RI); - emitByte(0xf0 + instr.dst); - emit32(instr.getImm32()); -#ifdef RANDOMX_ALIGN - emit(NOP2); -#endif - break; - case randomx::SuperscalarInstructionType::IMULH_R: - emit(REX_MOV_RR64); - emitByte(0xc0 + instr.dst); - emit(REX_MUL_R); - emitByte(0xe0 + instr.src); - emit(REX_MOV_R64R); - emitByte(0xc2 + 8 * instr.dst); - break; - case randomx::SuperscalarInstructionType::ISMULH_R: - emit(REX_MOV_RR64); - emitByte(0xc0 + instr.dst); - emit(REX_MUL_R); - emitByte(0xe8 + instr.src); - emit(REX_MOV_R64R); - emitByte(0xc2 + 8 * instr.dst); - break; - case randomx::SuperscalarInstructionType::IMUL_RCP: - emit(MOV_RAX_I); - emit64(reciprocalCache[instr.getImm32()]); - emit(REX_IMUL_RM); - emitByte(0xc0 + 8 * instr.dst); - break; - default: - UNREACHABLE; - } - } - - void JitCompilerX86::genAddressReg(Instruction& instr, bool rax = true) { - emit(LEA_32); - emitByte(0x80 + instr.src + (rax ? 0 : 8)); - if (instr.src == RegisterNeedsSib) { - emitByte(0x24); - } - emit32(instr.getImm32()); - if (rax) - emitByte(AND_EAX_I); - else - emit(AND_ECX_I); - emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); - } - - void JitCompilerX86::genAddressRegDst(Instruction& instr) { - emit(LEA_32); - emitByte(0x80 + instr.dst); - if (instr.dst == RegisterNeedsSib) { - emitByte(0x24); - } - emit32(instr.getImm32()); - emitByte(AND_EAX_I); - if (instr.getModCond() < StoreL3Condition) { - emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); - } - else { - emit32(ScratchpadL3Mask); - } - } - - void JitCompilerX86::genAddressImm(Instruction& instr) { - emit32(instr.getImm32() & ScratchpadL3Mask); - } - - void JitCompilerX86::h_IADD_RS(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - emit(REX_LEA); - if (instr.dst == RegisterNeedsDisplacement) - emitByte(0xac); - else - emitByte(0x04 + 8 * instr.dst); - genSIB(instr.getModShift(), instr.src, instr.dst); - if (instr.dst == RegisterNeedsDisplacement) - emit32(instr.getImm32()); - } - - void JitCompilerX86::h_IADD_M(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - if (instr.src != instr.dst) { - genAddressReg(instr); - emit(REX_ADD_RM); - emitByte(0x04 + 8 * instr.dst); - emitByte(0x06); - } - else { - emit(REX_ADD_RM); - emitByte(0x86 + 8 * instr.dst); - genAddressImm(instr); - } - } - - void JitCompilerX86::genSIB(int scale, int index, int base) { - emitByte((scale << 6) | (index << 3) | base); - } - - void JitCompilerX86::h_ISUB_R(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - if (instr.src != instr.dst) { - emit(REX_SUB_RR); - emitByte(0xc0 + 8 * instr.dst + instr.src); - } - else { - emit(REX_81); - emitByte(0xe8 + instr.dst); - emit32(instr.getImm32()); - } - } - - void JitCompilerX86::h_ISUB_M(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - if (instr.src != instr.dst) { - genAddressReg(instr); - emit(REX_SUB_RM); - emitByte(0x04 + 8 * instr.dst); - emitByte(0x06); - } - else { - emit(REX_SUB_RM); - emitByte(0x86 + 8 * instr.dst); - genAddressImm(instr); - } - } - - void JitCompilerX86::h_IMUL_R(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - if (instr.src != instr.dst) { - emit(REX_IMUL_RR); - emitByte(0xc0 + 8 * instr.dst + instr.src); - } - else { - emit(REX_IMUL_RRI); - emitByte(0xc0 + 9 * instr.dst); - emit32(instr.getImm32()); - } - } - - void JitCompilerX86::h_IMUL_M(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - if (instr.src != instr.dst) { - genAddressReg(instr); - emit(REX_IMUL_RM); - emitByte(0x04 + 8 * instr.dst); - emitByte(0x06); - } - else { - emit(REX_IMUL_RM); - emitByte(0x86 + 8 * instr.dst); - genAddressImm(instr); - } - } - - void JitCompilerX86::h_IMULH_R(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - emit(REX_MOV_RR64); - emitByte(0xc0 + instr.dst); - emit(REX_MUL_R); - emitByte(0xe0 + instr.src); - emit(REX_MOV_R64R); - emitByte(0xc2 + 8 * instr.dst); - } - - void JitCompilerX86::h_IMULH_M(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - if (instr.src != instr.dst) { - genAddressReg(instr, false); - emit(REX_MOV_RR64); - emitByte(0xc0 + instr.dst); - emit(REX_MUL_MEM); - } - else { - emit(REX_MOV_RR64); - emitByte(0xc0 + instr.dst); - emit(REX_MUL_M); - emitByte(0xa6); - genAddressImm(instr); - } - emit(REX_MOV_R64R); - emitByte(0xc2 + 8 * instr.dst); - } - - void JitCompilerX86::h_ISMULH_R(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - emit(REX_MOV_RR64); - emitByte(0xc0 + instr.dst); - emit(REX_MUL_R); - emitByte(0xe8 + instr.src); - emit(REX_MOV_R64R); - emitByte(0xc2 + 8 * instr.dst); - } - - void JitCompilerX86::h_ISMULH_M(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - if (instr.src != instr.dst) { - genAddressReg(instr, false); - emit(REX_MOV_RR64); - emitByte(0xc0 + instr.dst); - emit(REX_IMUL_MEM); - } - else { - emit(REX_MOV_RR64); - emitByte(0xc0 + instr.dst); - emit(REX_MUL_M); - emitByte(0xae); - genAddressImm(instr); - } - emit(REX_MOV_R64R); - emitByte(0xc2 + 8 * instr.dst); - } - - void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) { - uint64_t divisor = instr.getImm32(); - if (!isZeroOrPowerOf2(divisor)) { - registerUsage[instr.dst] = i; - emit(MOV_RAX_I); - emit64(randomx_reciprocal_fast(divisor)); - emit(REX_IMUL_RM); - emitByte(0xc0 + 8 * instr.dst); - } - } - - void JitCompilerX86::h_INEG_R(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - emit(REX_NEG); - emitByte(0xd8 + instr.dst); - } - - void JitCompilerX86::h_IXOR_R(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - if (instr.src != instr.dst) { - emit(REX_XOR_RR); - emitByte(0xc0 + 8 * instr.dst + instr.src); - } - else { - emit(REX_XOR_RI); - emitByte(0xf0 + instr.dst); - emit32(instr.getImm32()); - } - } - - void JitCompilerX86::h_IXOR_M(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - if (instr.src != instr.dst) { - genAddressReg(instr); - emit(REX_XOR_RM); - emitByte(0x04 + 8 * instr.dst); - emitByte(0x06); - } - else { - emit(REX_XOR_RM); - emitByte(0x86 + 8 * instr.dst); - genAddressImm(instr); - } - } - - void JitCompilerX86::h_IROR_R(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - if (instr.src != instr.dst) { - emit(REX_MOV_RR); - emitByte(0xc8 + instr.src); - emit(REX_ROT_CL); - emitByte(0xc8 + instr.dst); - } - else { - emit(REX_ROT_I8); - emitByte(0xc8 + instr.dst); - emitByte(instr.getImm32() & 63); - } - } - - void JitCompilerX86::h_IROL_R(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - if (instr.src != instr.dst) { - emit(REX_MOV_RR); - emitByte(0xc8 + instr.src); - emit(REX_ROT_CL); - emitByte(0xc0 + instr.dst); - } - else { - emit(REX_ROT_I8); - emitByte(0xc0 + instr.dst); - emitByte(instr.getImm32() & 63); - } - } - - void JitCompilerX86::h_ISWAP_R(Instruction& instr, int i) { - if (instr.src != instr.dst) { - registerUsage[instr.dst] = i; - registerUsage[instr.src] = i; - emit(REX_XCHG); - emitByte(0xc0 + instr.src + 8 * instr.dst); - } - } - - void JitCompilerX86::h_FSWAP_R(Instruction& instr, int i) { - emit(SHUFPD); - emitByte(0xc0 + 9 * instr.dst); - emitByte(1); - } - - void JitCompilerX86::h_FADD_R(Instruction& instr, int i) { - instr.dst %= RegisterCountFlt; - instr.src %= RegisterCountFlt; - emit(REX_ADDPD); - emitByte(0xc0 + instr.src + 8 * instr.dst); - } - - void JitCompilerX86::h_FADD_M(Instruction& instr, int i) { - instr.dst %= RegisterCountFlt; - genAddressReg(instr); - emit(REX_CVTDQ2PD_XMM12); - emit(REX_ADDPD); - emitByte(0xc4 + 8 * instr.dst); - } - - void JitCompilerX86::h_FSUB_R(Instruction& instr, int i) { - instr.dst %= RegisterCountFlt; - instr.src %= RegisterCountFlt; - emit(REX_SUBPD); - emitByte(0xc0 + instr.src + 8 * instr.dst); - } - - void JitCompilerX86::h_FSUB_M(Instruction& instr, int i) { - instr.dst %= RegisterCountFlt; - genAddressReg(instr); - emit(REX_CVTDQ2PD_XMM12); - emit(REX_SUBPD); - emitByte(0xc4 + 8 * instr.dst); - } - - void JitCompilerX86::h_FSCAL_R(Instruction& instr, int i) { - instr.dst %= RegisterCountFlt; - emit(REX_XORPS); - emitByte(0xc7 + 8 * instr.dst); - } - - void JitCompilerX86::h_FMUL_R(Instruction& instr, int i) { - instr.dst %= RegisterCountFlt; - instr.src %= RegisterCountFlt; - emit(REX_MULPD); - emitByte(0xe0 + instr.src + 8 * instr.dst); - } - - void JitCompilerX86::h_FDIV_M(Instruction& instr, int i) { - instr.dst %= RegisterCountFlt; - genAddressReg(instr); - emit(REX_CVTDQ2PD_XMM12); - emit(REX_ANDPS_XMM12); - emit(REX_DIVPD); - emitByte(0xe4 + 8 * instr.dst); - } - - void JitCompilerX86::h_FSQRT_R(Instruction& instr, int i) { - instr.dst %= RegisterCountFlt; - emit(SQRTPD); - emitByte(0xe4 + 9 * instr.dst); - } - - void JitCompilerX86::h_CFROUND(Instruction& instr, int i) { - emit(REX_MOV_RR64); - emitByte(0xc0 + instr.src); - int rotate = (13 - (instr.getImm32() & 63)) & 63; - if (rotate != 0) { - emit(ROL_RAX); - emitByte(rotate); - } - emit(AND_OR_MOV_LDMXCSR); - } - - void JitCompilerX86::h_CBRANCH(Instruction& instr, int i) { - int reg = instr.dst; - int target = registerUsage[reg] + 1; - emit(REX_ADD_I); - emitByte(0xc0 + reg); - int shift = instr.getModCond() + ConditionOffset; - uint32_t imm = instr.getImm32() | (1UL << shift); - if (ConditionOffset > 0 || shift > 0) - imm &= ~(1UL << (shift - 1)); - emit32(imm); - emit(REX_TEST); - emitByte(0xc0 + reg); - emit32(ConditionMask << shift); - emit(JZ); - emit32(instructionOffsets[target] - (codePos + 4)); - //mark all registers as used - for (unsigned j = 0; j < RegistersCount; ++j) { - registerUsage[j] = i; - } - } - - void JitCompilerX86::h_ISTORE(Instruction& instr, int i) { - genAddressRegDst(instr); - emit(REX_MOV_MR); - emitByte(0x04 + 8 * instr.src); - emitByte(0x06); - } - - void JitCompilerX86::h_NOP(Instruction& instr, int i) { - emit(NOP1); - } - -#include "instruction_weights.hpp" -#define INST_HANDLE(x) REPN(&JitCompilerX86::h_##x, WT(x)) - - InstructionGeneratorX86 JitCompilerX86::engine[256] = { - INST_HANDLE(IADD_RS) - INST_HANDLE(IADD_M) - INST_HANDLE(ISUB_R) - INST_HANDLE(ISUB_M) - INST_HANDLE(IMUL_R) - INST_HANDLE(IMUL_M) - INST_HANDLE(IMULH_R) - INST_HANDLE(IMULH_M) - INST_HANDLE(ISMULH_R) - INST_HANDLE(ISMULH_M) - INST_HANDLE(IMUL_RCP) - INST_HANDLE(INEG_R) - INST_HANDLE(IXOR_R) - INST_HANDLE(IXOR_M) - INST_HANDLE(IROR_R) - INST_HANDLE(IROL_R) - INST_HANDLE(ISWAP_R) - INST_HANDLE(FSWAP_R) - INST_HANDLE(FADD_R) - INST_HANDLE(FADD_M) - INST_HANDLE(FSUB_R) - INST_HANDLE(FSUB_M) - INST_HANDLE(FSCAL_R) - INST_HANDLE(FMUL_R) - INST_HANDLE(FDIV_M) - INST_HANDLE(FSQRT_R) - INST_HANDLE(CBRANCH) - INST_HANDLE(CFROUND) - INST_HANDLE(ISTORE) - INST_HANDLE(NOP) - }; - -} diff --git a/external/src/randomx/src/jit_compiler_x86.hpp b/external/src/randomx/src/jit_compiler_x86.hpp deleted file mode 100644 index e95685f..0000000 --- a/external/src/randomx/src/jit_compiler_x86.hpp +++ /dev/null @@ -1,142 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include -#include -#include -#include "common.hpp" - -namespace randomx { - - class Program; - struct ProgramConfiguration; - class SuperscalarProgram; - class JitCompilerX86; - class Instruction; - - typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int); - - class JitCompilerX86 { - public: - JitCompilerX86(); - ~JitCompilerX86(); - void generateProgram(Program&, ProgramConfiguration&); - void generateProgramLight(Program&, ProgramConfiguration&, uint32_t); - template - void generateSuperscalarHash(SuperscalarProgram (&programs)[N], std::vector &); - void generateDatasetInitCode(); - ProgramFunc* getProgramFunc() { - return (ProgramFunc*)code; - } - DatasetInitFunc* getDatasetInitFunc() { - return (DatasetInitFunc*)code; - } - uint8_t* getCode() { - return code; - } - size_t getCodeSize(); - void enableWriting(); - void enableExecution(); - void enableAll(); - private: - static InstructionGeneratorX86 engine[256]; - std::vector instructionOffsets; - int registerUsage[RegistersCount]; - uint8_t* code; - int32_t codePos; - - void generateProgramPrologue(Program&, ProgramConfiguration&); - void generateProgramEpilogue(Program&, ProgramConfiguration&); - void genAddressReg(Instruction&, bool); - void genAddressRegDst(Instruction&); - void genAddressImm(Instruction&); - void genSIB(int scale, int index, int base); - - void generateCode(Instruction&, int); - void generateSuperscalarCode(Instruction &, std::vector &); - - void emitByte(uint8_t val) { - code[codePos] = val; - codePos++; - } - - void emit32(uint32_t val) { - memcpy(code + codePos, &val, sizeof val); - codePos += sizeof val; - } - - void emit64(uint64_t val) { - memcpy(code + codePos, &val, sizeof val); - codePos += sizeof val; - } - - template - void emit(const uint8_t (&src)[N]) { - emit(src, N); - } - - void emit(const uint8_t* src, size_t count) { - memcpy(code + codePos, src, count); - codePos += count; - } - - void h_IADD_RS(Instruction&, int); - void h_IADD_M(Instruction&, int); - void h_ISUB_R(Instruction&, int); - void h_ISUB_M(Instruction&, int); - void h_IMUL_R(Instruction&, int); - void h_IMUL_M(Instruction&, int); - void h_IMULH_R(Instruction&, int); - void h_IMULH_M(Instruction&, int); - void h_ISMULH_R(Instruction&, int); - void h_ISMULH_M(Instruction&, int); - void h_IMUL_RCP(Instruction&, int); - void h_INEG_R(Instruction&, int); - void h_IXOR_R(Instruction&, int); - void h_IXOR_M(Instruction&, int); - void h_IROR_R(Instruction&, int); - void h_IROL_R(Instruction&, int); - void h_ISWAP_R(Instruction&, int); - void h_FSWAP_R(Instruction&, int); - void h_FADD_R(Instruction&, int); - void h_FADD_M(Instruction&, int); - void h_FSUB_R(Instruction&, int); - void h_FSUB_M(Instruction&, int); - void h_FSCAL_R(Instruction&, int); - void h_FMUL_R(Instruction&, int); - void h_FDIV_M(Instruction&, int); - void h_FSQRT_R(Instruction&, int); - void h_CBRANCH(Instruction&, int); - void h_CFROUND(Instruction&, int); - void h_ISTORE(Instruction&, int); - void h_NOP(Instruction&, int); - }; - -} \ No newline at end of file diff --git a/external/src/randomx/src/jit_compiler_x86_static.S b/external/src/randomx/src/jit_compiler_x86_static.S deleted file mode 100644 index 6757c58..0000000 --- a/external/src/randomx/src/jit_compiler_x86_static.S +++ /dev/null @@ -1,227 +0,0 @@ -# Copyright (c) 2018-2019, tevador -# -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the copyright holder nor the -# names of its contributors may be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -.intel_syntax noprefix -#if defined(__APPLE__) -.text -#define DECL(x) _##x -#else -.section .text -#define DECL(x) x -#endif - -#if defined(__WIN32__) || defined(__CYGWIN__) -#define WINABI -#endif - -.global DECL(randomx_prefetch_scratchpad) -.global DECL(randomx_prefetch_scratchpad_end) -.global DECL(randomx_program_prologue) -.global DECL(randomx_program_loop_begin) -.global DECL(randomx_program_loop_load) -.global DECL(randomx_program_start) -.global DECL(randomx_program_read_dataset) -.global DECL(randomx_program_read_dataset_sshash_init) -.global DECL(randomx_program_read_dataset_sshash_fin) -.global DECL(randomx_program_loop_store) -.global DECL(randomx_program_loop_end) -.global DECL(randomx_dataset_init) -.global DECL(randomx_program_epilogue) -.global DECL(randomx_sshash_load) -.global DECL(randomx_sshash_prefetch) -.global DECL(randomx_sshash_end) -.global DECL(randomx_sshash_init) -.global DECL(randomx_program_end) -.global DECL(randomx_reciprocal_fast) - -#include "configuration.h" - -#define RANDOMX_SCRATCHPAD_MASK (RANDOMX_SCRATCHPAD_L3-64) -#define RANDOMX_DATASET_BASE_MASK (RANDOMX_DATASET_BASE_SIZE-64) -#define RANDOMX_CACHE_MASK (RANDOMX_ARGON_MEMORY*16-1) -#define RANDOMX_ALIGN 4096 -#define SUPERSCALAR_OFFSET ((((RANDOMX_ALIGN + 32 * RANDOMX_PROGRAM_SIZE) - 1) / (RANDOMX_ALIGN) + 1) * (RANDOMX_ALIGN)) - -#define db .byte - -DECL(randomx_prefetch_scratchpad): - mov rdx, rax - and eax, RANDOMX_SCRATCHPAD_MASK - prefetcht0 [rsi+rax] - ror rdx, 32 - and edx, RANDOMX_SCRATCHPAD_MASK - prefetcht0 [rsi+rdx] - -DECL(randomx_prefetch_scratchpad_end): - -.balign 64 -DECL(randomx_program_prologue): -#if defined(WINABI) - #include "asm/program_prologue_win64.inc" -#else - #include "asm/program_prologue_linux.inc" -#endif - movapd xmm13, xmmword ptr [mantissaMask+rip] - movapd xmm14, xmmword ptr [exp240+rip] - movapd xmm15, xmmword ptr [scaleMask+rip] - mov rdx, rax - and eax, RANDOMX_SCRATCHPAD_MASK - ror rdx, 32 - and edx, RANDOMX_SCRATCHPAD_MASK - jmp DECL(randomx_program_loop_begin) - -.balign 64 - #include "asm/program_xmm_constants.inc" - -.balign 64 -DECL(randomx_program_loop_begin): - nop - -DECL(randomx_program_loop_load): - #include "asm/program_loop_load.inc" - -DECL(randomx_program_start): - nop - -DECL(randomx_program_read_dataset): - #include "asm/program_read_dataset.inc" - -DECL(randomx_program_read_dataset_sshash_init): - #include "asm/program_read_dataset_sshash_init.inc" - -DECL(randomx_program_read_dataset_sshash_fin): - #include "asm/program_read_dataset_sshash_fin.inc" - -DECL(randomx_program_loop_store): - #include "asm/program_loop_store.inc" - -DECL(randomx_program_loop_end): - nop - -.balign 64 -DECL(randomx_dataset_init): - push rbx - push rbp - push r12 - push r13 - push r14 - push r15 -#if defined(WINABI) - push rdi - push rsi - mov rdi, qword ptr [rcx] ;# cache->memory - mov rsi, rdx ;# dataset - mov rbp, r8 ;# block index - push r9 ;# max. block index -#else - mov rdi, qword ptr [rdi] ;# cache->memory - ;# dataset in rsi - mov rbp, rdx ;# block index - push rcx ;# max. block index -#endif -init_block_loop: - prefetchw byte ptr [rsi] - mov rbx, rbp - .byte 232 ;# 0xE8 = call - .int SUPERSCALAR_OFFSET - (call_offset - DECL(randomx_dataset_init)) -call_offset: - mov qword ptr [rsi+0], r8 - mov qword ptr [rsi+8], r9 - mov qword ptr [rsi+16], r10 - mov qword ptr [rsi+24], r11 - mov qword ptr [rsi+32], r12 - mov qword ptr [rsi+40], r13 - mov qword ptr [rsi+48], r14 - mov qword ptr [rsi+56], r15 - add rbp, 1 - add rsi, 64 - cmp rbp, qword ptr [rsp] - jb init_block_loop - pop rax -#if defined(WINABI) - pop rsi - pop rdi -#endif - pop r15 - pop r14 - pop r13 - pop r12 - pop rbp - pop rbx - ret - -.balign 64 -DECL(randomx_program_epilogue): - #include "asm/program_epilogue_store.inc" -#if defined(WINABI) - #include "asm/program_epilogue_win64.inc" -#else - #include "asm/program_epilogue_linux.inc" -#endif - -.balign 64 -DECL(randomx_sshash_load): - #include "asm/program_sshash_load.inc" - -DECL(randomx_sshash_prefetch): - #include "asm/program_sshash_prefetch.inc" - -DECL(randomx_sshash_end): - nop - -.balign 64 -DECL(randomx_sshash_init): - lea r8, [rbx+1] - #include "asm/program_sshash_prefetch.inc" - imul r8, qword ptr [r0_mul+rip] - mov r9, qword ptr [r1_add+rip] - xor r9, r8 - mov r10, qword ptr [r2_add+rip] - xor r10, r8 - mov r11, qword ptr [r3_add+rip] - xor r11, r8 - mov r12, qword ptr [r4_add+rip] - xor r12, r8 - mov r13, qword ptr [r5_add+rip] - xor r13, r8 - mov r14, qword ptr [r6_add+rip] - xor r14, r8 - mov r15, qword ptr [r7_add+rip] - xor r15, r8 - jmp DECL(randomx_program_end) - -.balign 64 - #include "asm/program_sshash_constants.inc" - -.balign 64 -DECL(randomx_program_end): - nop - -DECL(randomx_reciprocal_fast): -#if !defined(WINABI) - mov rcx, rdi -#endif - #include "asm/randomx_reciprocal.inc" diff --git a/external/src/randomx/src/jit_compiler_x86_static.asm b/external/src/randomx/src/jit_compiler_x86_static.asm deleted file mode 100644 index 2d6150e..0000000 --- a/external/src/randomx/src/jit_compiler_x86_static.asm +++ /dev/null @@ -1,221 +0,0 @@ -; Copyright (c) 2018-2019, tevador -; -; All rights reserved. -; -; Redistribution and use in source and binary forms, with or without -; modification, are permitted provided that the following conditions are met: -; * Redistributions of source code must retain the above copyright -; notice, this list of conditions and the following disclaimer. -; * Redistributions in binary form must reproduce the above copyright -; notice, this list of conditions and the following disclaimer in the -; documentation and/or other materials provided with the distribution. -; * Neither the name of the copyright holder nor the -; names of its contributors may be used to endorse or promote products -; derived from this software without specific prior written permission. -; -; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -IFDEF RAX - -_RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE - -PUBLIC randomx_prefetch_scratchpad -PUBLIC randomx_prefetch_scratchpad_end -PUBLIC randomx_program_prologue -PUBLIC randomx_program_loop_begin -PUBLIC randomx_program_loop_load -PUBLIC randomx_program_start -PUBLIC randomx_program_read_dataset -PUBLIC randomx_program_read_dataset_sshash_init -PUBLIC randomx_program_read_dataset_sshash_fin -PUBLIC randomx_dataset_init -PUBLIC randomx_program_loop_store -PUBLIC randomx_program_loop_end -PUBLIC randomx_program_epilogue -PUBLIC randomx_sshash_load -PUBLIC randomx_sshash_prefetch -PUBLIC randomx_sshash_end -PUBLIC randomx_sshash_init -PUBLIC randomx_program_end -PUBLIC randomx_reciprocal_fast - -include asm/configuration.asm - -RANDOMX_SCRATCHPAD_MASK EQU (RANDOMX_SCRATCHPAD_L3-64) -RANDOMX_DATASET_BASE_MASK EQU (RANDOMX_DATASET_BASE_SIZE-64) -RANDOMX_CACHE_MASK EQU (RANDOMX_ARGON_MEMORY*16-1) -RANDOMX_ALIGN EQU 4096 -SUPERSCALAR_OFFSET EQU ((((RANDOMX_ALIGN + 32 * RANDOMX_PROGRAM_SIZE) - 1) / (RANDOMX_ALIGN) + 1) * (RANDOMX_ALIGN)) - -randomx_prefetch_scratchpad PROC - mov rdx, rax - and eax, RANDOMX_SCRATCHPAD_MASK - prefetcht0 [rsi+rax] - ror rdx, 32 - and edx, RANDOMX_SCRATCHPAD_MASK - prefetcht0 [rsi+rdx] -randomx_prefetch_scratchpad ENDP - -randomx_prefetch_scratchpad_end PROC -randomx_prefetch_scratchpad_end ENDP - -ALIGN 64 -randomx_program_prologue PROC - include asm/program_prologue_win64.inc - movapd xmm13, xmmword ptr [mantissaMask] - movapd xmm14, xmmword ptr [exp240] - movapd xmm15, xmmword ptr [scaleMask] - mov rdx, rax - and eax, RANDOMX_SCRATCHPAD_MASK - ror rdx, 32 - and edx, RANDOMX_SCRATCHPAD_MASK - jmp randomx_program_loop_begin -randomx_program_prologue ENDP - -ALIGN 64 - include asm/program_xmm_constants.inc - -ALIGN 64 -randomx_program_loop_begin PROC - nop -randomx_program_loop_begin ENDP - -randomx_program_loop_load PROC - include asm/program_loop_load.inc -randomx_program_loop_load ENDP - -randomx_program_start PROC - nop -randomx_program_start ENDP - -randomx_program_read_dataset PROC - include asm/program_read_dataset.inc -randomx_program_read_dataset ENDP - -randomx_program_read_dataset_sshash_init PROC - include asm/program_read_dataset_sshash_init.inc -randomx_program_read_dataset_sshash_init ENDP - -randomx_program_read_dataset_sshash_fin PROC - include asm/program_read_dataset_sshash_fin.inc -randomx_program_read_dataset_sshash_fin ENDP - -randomx_program_loop_store PROC - include asm/program_loop_store.inc -randomx_program_loop_store ENDP - -randomx_program_loop_end PROC - nop -randomx_program_loop_end ENDP - -ALIGN 64 -randomx_dataset_init PROC - push rbx - push rbp - push rdi - push rsi - push r12 - push r13 - push r14 - push r15 - mov rdi, qword ptr [rcx] ;# cache->memory - mov rsi, rdx ;# dataset - mov rbp, r8 ;# block index - push r9 ;# max. block index -init_block_loop: - prefetchw byte ptr [rsi] - mov rbx, rbp - db 232 ;# 0xE8 = call - dd SUPERSCALAR_OFFSET - distance - distance equ $ - offset randomx_dataset_init - mov qword ptr [rsi+0], r8 - mov qword ptr [rsi+8], r9 - mov qword ptr [rsi+16], r10 - mov qword ptr [rsi+24], r11 - mov qword ptr [rsi+32], r12 - mov qword ptr [rsi+40], r13 - mov qword ptr [rsi+48], r14 - mov qword ptr [rsi+56], r15 - add rbp, 1 - add rsi, 64 - cmp rbp, qword ptr [rsp] - jb init_block_loop - pop r9 - pop r15 - pop r14 - pop r13 - pop r12 - pop rsi - pop rdi - pop rbp - pop rbx - ret -randomx_dataset_init ENDP - -ALIGN 64 -randomx_program_epilogue PROC - include asm/program_epilogue_store.inc - include asm/program_epilogue_win64.inc -randomx_program_epilogue ENDP - -ALIGN 64 -randomx_sshash_load PROC - include asm/program_sshash_load.inc -randomx_sshash_load ENDP - -randomx_sshash_prefetch PROC - include asm/program_sshash_prefetch.inc -randomx_sshash_prefetch ENDP - -randomx_sshash_end PROC - nop -randomx_sshash_end ENDP - -ALIGN 64 -randomx_sshash_init PROC - lea r8, [rbx+1] - include asm/program_sshash_prefetch.inc - imul r8, qword ptr [r0_mul] - mov r9, qword ptr [r1_add] - xor r9, r8 - mov r10, qword ptr [r2_add] - xor r10, r8 - mov r11, qword ptr [r3_add] - xor r11, r8 - mov r12, qword ptr [r4_add] - xor r12, r8 - mov r13, qword ptr [r5_add] - xor r13, r8 - mov r14, qword ptr [r6_add] - xor r14, r8 - mov r15, qword ptr [r7_add] - xor r15, r8 - jmp randomx_program_end -randomx_sshash_init ENDP - -ALIGN 64 - include asm/program_sshash_constants.inc - -ALIGN 64 -randomx_program_end PROC - nop -randomx_program_end ENDP - -randomx_reciprocal_fast PROC - include asm/randomx_reciprocal.inc -randomx_reciprocal_fast ENDP - -_RANDOMX_JITX86_STATIC ENDS - -ENDIF - -END diff --git a/external/src/randomx/src/jit_compiler_x86_static.hpp b/external/src/randomx/src/jit_compiler_x86_static.hpp deleted file mode 100644 index fe32a8b..0000000 --- a/external/src/randomx/src/jit_compiler_x86_static.hpp +++ /dev/null @@ -1,50 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -extern "C" { - void randomx_prefetch_scratchpad(); - void randomx_prefetch_scratchpad_end(); - void randomx_program_prologue(); - void randomx_program_loop_begin(); - void randomx_program_loop_load(); - void randomx_program_start(); - void randomx_program_read_dataset(); - void randomx_program_read_dataset_sshash_init(); - void randomx_program_read_dataset_sshash_fin(); - void randomx_program_loop_store(); - void randomx_program_loop_end(); - void randomx_dataset_init(); - void randomx_program_epilogue(); - void randomx_sshash_load(); - void randomx_sshash_prefetch(); - void randomx_sshash_end(); - void randomx_sshash_init(); - void randomx_program_end(); -} diff --git a/external/src/randomx/src/program.hpp b/external/src/randomx/src/program.hpp deleted file mode 100644 index d0f6805..0000000 --- a/external/src/randomx/src/program.hpp +++ /dev/null @@ -1,71 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include -#include -#include "common.hpp" -#include "instruction.hpp" -#include "blake2/endian.h" - -namespace randomx { - - struct ProgramConfiguration { - uint64_t eMask[2]; - uint32_t readReg0, readReg1, readReg2, readReg3; - }; - - class Program { - public: - Instruction& operator()(int pc) { - return programBuffer[pc]; - } - friend std::ostream& operator<<(std::ostream& os, const Program& p) { - p.print(os); - return os; - } - uint64_t getEntropy(int i) { - return load64(&entropyBuffer[i]); - } - uint32_t getSize() { - return RANDOMX_PROGRAM_SIZE; - } - private: - void print(std::ostream& os) const { - for (int i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) { - auto instr = programBuffer[i]; - os << instr; - } - } - uint64_t entropyBuffer[16]; - Instruction programBuffer[RANDOMX_PROGRAM_SIZE]; - }; - - static_assert(sizeof(Program) % 64 == 0, "Invalid size of class randomx::Program"); -} diff --git a/external/src/randomx/src/randomx.cpp b/external/src/randomx/src/randomx.cpp deleted file mode 100644 index 7d239f6..0000000 --- a/external/src/randomx/src/randomx.cpp +++ /dev/null @@ -1,397 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include "randomx.h" -#include "dataset.hpp" -#include "vm_interpreted.hpp" -#include "vm_interpreted_light.hpp" -#include "vm_compiled.hpp" -#include "vm_compiled_light.hpp" -#include "blake2/blake2.h" -#include "cpu.hpp" -#include -#include -#include - -extern "C" { - - randomx_flags randomx_get_flags() { - randomx_flags flags = RANDOMX_HAVE_COMPILER ? RANDOMX_FLAG_JIT : RANDOMX_FLAG_DEFAULT; - randomx::Cpu cpu; -#ifdef RANDOMX_FORCE_SECURE - if (flags == RANDOMX_FLAG_JIT) { - flags |= RANDOMX_FLAG_SECURE; - } -#endif - if (HAVE_AES && cpu.hasAes()) { - flags |= RANDOMX_FLAG_HARD_AES; - } - if (randomx_argon2_impl_avx2() != nullptr && cpu.hasAvx2()) { - flags |= RANDOMX_FLAG_ARGON2_AVX2; - } - if (randomx_argon2_impl_ssse3() != nullptr && cpu.hasSsse3()) { - flags |= RANDOMX_FLAG_ARGON2_SSSE3; - } - return flags; - } - - randomx_cache *randomx_alloc_cache(randomx_flags flags) { - randomx_cache *cache = nullptr; - auto impl = randomx::selectArgonImpl(flags); - if (impl == nullptr) { - return cache; - } - - try { - cache = new randomx_cache(); - cache->argonImpl = impl; - switch ((int)(flags & (RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES))) { - case RANDOMX_FLAG_DEFAULT: - cache->dealloc = &randomx::deallocCache; - cache->jit = nullptr; - cache->initialize = &randomx::initCache; - cache->datasetInit = &randomx::initDataset; - cache->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(randomx::CacheSize); - break; - - case RANDOMX_FLAG_JIT: - cache->dealloc = &randomx::deallocCache; - cache->jit = new randomx::JitCompiler(); - cache->initialize = &randomx::initCacheCompile; - cache->datasetInit = cache->jit->getDatasetInitFunc(); - cache->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(randomx::CacheSize); - break; - - case RANDOMX_FLAG_LARGE_PAGES: - cache->dealloc = &randomx::deallocCache; - cache->jit = nullptr; - cache->initialize = &randomx::initCache; - cache->datasetInit = &randomx::initDataset; - cache->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(randomx::CacheSize); - break; - - case RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES: - cache->dealloc = &randomx::deallocCache; - cache->jit = new randomx::JitCompiler(); - cache->initialize = &randomx::initCacheCompile; - cache->datasetInit = cache->jit->getDatasetInitFunc(); - cache->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(randomx::CacheSize); - break; - - default: - UNREACHABLE; - } - } - catch (std::exception &ex) { - if (cache != nullptr) { - randomx_release_cache(cache); - cache = nullptr; - } - } - - return cache; - } - - void randomx_init_cache(randomx_cache *cache, const void *key, size_t keySize) { - assert(cache != nullptr); - assert(keySize == 0 || key != nullptr); - std::string cacheKey; - cacheKey.assign((const char *)key, keySize); - if (cache->cacheKey != cacheKey || !cache->isInitialized()) { - cache->initialize(cache, key, keySize); - cache->cacheKey = cacheKey; - } - } - - void randomx_release_cache(randomx_cache* cache) { - assert(cache != nullptr); - if (cache->memory != nullptr) { - cache->dealloc(cache); - } - delete cache; - } - - randomx_dataset *randomx_alloc_dataset(randomx_flags flags) { - - //fail on 32-bit systems if DatasetSize is >= 4 GiB - if (randomx::DatasetSize > std::numeric_limits::max()) { - return nullptr; - } - - randomx_dataset *dataset = nullptr; - - try { - dataset = new randomx_dataset(); - if (flags & RANDOMX_FLAG_LARGE_PAGES) { - dataset->dealloc = &randomx::deallocDataset; - dataset->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(randomx::DatasetSize); - } - else { - dataset->dealloc = &randomx::deallocDataset; - dataset->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(randomx::DatasetSize); - } - } - catch (std::exception &ex) { - if (dataset != nullptr) { - randomx_release_dataset(dataset); - dataset = nullptr; - } - } - - return dataset; - } - - constexpr unsigned long DatasetItemCount = randomx::DatasetSize / RANDOMX_DATASET_ITEM_SIZE; - - unsigned long randomx_dataset_item_count() { - return DatasetItemCount; - } - - void randomx_init_dataset(randomx_dataset *dataset, randomx_cache *cache, unsigned long startItem, unsigned long itemCount) { - assert(dataset != nullptr); - assert(cache != nullptr); - assert(startItem < DatasetItemCount && itemCount <= DatasetItemCount); - assert(startItem + itemCount <= DatasetItemCount); - cache->datasetInit(cache, dataset->memory + startItem * randomx::CacheLineSize, startItem, startItem + itemCount); - } - - void *randomx_get_dataset_memory(randomx_dataset *dataset) { - assert(dataset != nullptr); - return dataset->memory; - } - - void randomx_release_dataset(randomx_dataset *dataset) { - assert(dataset != nullptr); - dataset->dealloc(dataset); - delete dataset; - } - - randomx_vm *randomx_create_vm(randomx_flags flags, randomx_cache *cache, randomx_dataset *dataset) { - assert(cache != nullptr || (flags & RANDOMX_FLAG_FULL_MEM)); - assert(cache == nullptr || cache->isInitialized()); - assert(dataset != nullptr || !(flags & RANDOMX_FLAG_FULL_MEM)); - - randomx_vm *vm = nullptr; - - try { - switch ((int)(flags & (RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES))) { - case RANDOMX_FLAG_DEFAULT: - vm = new randomx::InterpretedLightVmDefault(); - break; - - case RANDOMX_FLAG_FULL_MEM: - vm = new randomx::InterpretedVmDefault(); - break; - - case RANDOMX_FLAG_JIT: - if (flags & RANDOMX_FLAG_SECURE) { - vm = new randomx::CompiledLightVmDefaultSecure(); - } - else { - vm = new randomx::CompiledLightVmDefault(); - } - break; - - case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT: - if (flags & RANDOMX_FLAG_SECURE) { - vm = new randomx::CompiledVmDefaultSecure(); - } - else { - vm = new randomx::CompiledVmDefault(); - } - break; - - case RANDOMX_FLAG_HARD_AES: - vm = new randomx::InterpretedLightVmHardAes(); - break; - - case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_HARD_AES: - vm = new randomx::InterpretedVmHardAes(); - break; - - case RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES: - if (flags & RANDOMX_FLAG_SECURE) { - vm = new randomx::CompiledLightVmHardAesSecure(); - } - else { - vm = new randomx::CompiledLightVmHardAes(); - } - break; - - case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES: - if (flags & RANDOMX_FLAG_SECURE) { - vm = new randomx::CompiledVmHardAesSecure(); - } - else { - vm = new randomx::CompiledVmHardAes(); - } - break; - - case RANDOMX_FLAG_LARGE_PAGES: - vm = new randomx::InterpretedLightVmLargePage(); - break; - - case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_LARGE_PAGES: - vm = new randomx::InterpretedVmLargePage(); - break; - - case RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES: - if (flags & RANDOMX_FLAG_SECURE) { - vm = new randomx::CompiledLightVmLargePageSecure(); - } - else { - vm = new randomx::CompiledLightVmLargePage(); - } - break; - - case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES: - if (flags & RANDOMX_FLAG_SECURE) { - vm = new randomx::CompiledVmLargePageSecure(); - } - else { - vm = new randomx::CompiledVmLargePage(); - } - break; - - case RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES: - vm = new randomx::InterpretedLightVmLargePageHardAes(); - break; - - case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES: - vm = new randomx::InterpretedVmLargePageHardAes(); - break; - - case RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES: - if (flags & RANDOMX_FLAG_SECURE) { - vm = new randomx::CompiledLightVmLargePageHardAesSecure(); - } - else { - vm = new randomx::CompiledLightVmLargePageHardAes(); - } - break; - - case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES: - if (flags & RANDOMX_FLAG_SECURE) { - vm = new randomx::CompiledVmLargePageHardAesSecure(); - } - else { - vm = new randomx::CompiledVmLargePageHardAes(); - } - break; - - default: - UNREACHABLE; - } - - if(cache != nullptr) { - vm->setCache(cache); - vm->cacheKey = cache->cacheKey; - } - - if(dataset != nullptr) - vm->setDataset(dataset); - - vm->allocate(); - } - catch (std::exception &ex) { - delete vm; - vm = nullptr; - } - - return vm; - } - - void randomx_vm_set_cache(randomx_vm *machine, randomx_cache* cache) { - assert(machine != nullptr); - assert(cache != nullptr && cache->isInitialized()); - if (machine->cacheKey != cache->cacheKey || machine->getMemory() != cache->memory) { - machine->setCache(cache); - machine->cacheKey = cache->cacheKey; - } - } - - void randomx_vm_set_dataset(randomx_vm *machine, randomx_dataset *dataset) { - assert(machine != nullptr); - assert(dataset != nullptr); - machine->setDataset(dataset); - } - - void randomx_destroy_vm(randomx_vm *machine) { - assert(machine != nullptr); - delete machine; - } - - void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output) { - assert(machine != nullptr); - assert(inputSize == 0 || input != nullptr); - assert(output != nullptr); - fenv_t fpstate; - fegetenv(&fpstate); - alignas(16) uint64_t tempHash[8]; - int blakeResult = blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0); - assert(blakeResult == 0); - machine->initScratchpad(&tempHash); - machine->resetRoundingMode(); - for (int chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) { - machine->run(&tempHash); - blakeResult = blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0); - assert(blakeResult == 0); - } - machine->run(&tempHash); - machine->getFinalResult(output, RANDOMX_HASH_SIZE); - fesetenv(&fpstate); - } - - void randomx_calculate_hash_first(randomx_vm* machine, const void* input, size_t inputSize) { - blake2b(machine->tempHash, sizeof(machine->tempHash), input, inputSize, nullptr, 0); - machine->initScratchpad(machine->tempHash); - } - - void randomx_calculate_hash_next(randomx_vm* machine, const void* nextInput, size_t nextInputSize, void* output) { - machine->resetRoundingMode(); - for (uint32_t chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) { - machine->run(machine->tempHash); - blake2b(machine->tempHash, sizeof(machine->tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0); - } - machine->run(machine->tempHash); - - // Finish current hash and fill the scratchpad for the next hash at the same time - blake2b(machine->tempHash, sizeof(machine->tempHash), nextInput, nextInputSize, nullptr, 0); - machine->hashAndFill(output, RANDOMX_HASH_SIZE, machine->tempHash); - } - - void randomx_calculate_hash_last(randomx_vm* machine, void* output) { - machine->resetRoundingMode(); - for (int chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) { - machine->run(machine->tempHash); - blake2b(machine->tempHash, sizeof(machine->tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0); - } - machine->run(machine->tempHash); - machine->getFinalResult(output, RANDOMX_HASH_SIZE); - } -} diff --git a/external/src/randomx/src/randomx.h b/external/src/randomx/src/randomx.h deleted file mode 100644 index 1811857..0000000 --- a/external/src/randomx/src/randomx.h +++ /dev/null @@ -1,273 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef RANDOMX_H -#define RANDOMX_H - -#include -#include - -#define RANDOMX_HASH_SIZE 32 -#define RANDOMX_DATASET_ITEM_SIZE 64 - -#ifndef RANDOMX_EXPORT -#define RANDOMX_EXPORT -#endif - -typedef enum { - RANDOMX_FLAG_DEFAULT = 0, - RANDOMX_FLAG_LARGE_PAGES = 1, - RANDOMX_FLAG_HARD_AES = 2, - RANDOMX_FLAG_FULL_MEM = 4, - RANDOMX_FLAG_JIT = 8, - RANDOMX_FLAG_SECURE = 16, - RANDOMX_FLAG_ARGON2_SSSE3 = 32, - RANDOMX_FLAG_ARGON2_AVX2 = 64, - RANDOMX_FLAG_ARGON2 = 96 -} randomx_flags; - -#if defined(__cplusplus) -struct randomx_dataset; -struct randomx_cache; -class randomx_vm; -#else -typedef struct randomx_dataset randomx_dataset; -typedef struct randomx_cache randomx_cache; -typedef struct randomx_vm randomx_vm; -#endif - - -#if defined(__cplusplus) - -#ifdef __cpp_constexpr -#define CONSTEXPR constexpr -#else -#define CONSTEXPR -#endif - -inline CONSTEXPR randomx_flags operator |(randomx_flags a, randomx_flags b) { - return static_cast(static_cast(a) | static_cast(b)); -} -inline CONSTEXPR randomx_flags operator &(randomx_flags a, randomx_flags b) { - return static_cast(static_cast(a) & static_cast(b)); -} -inline randomx_flags& operator |=(randomx_flags& a, randomx_flags b) { - return a = a | b; -} - -extern "C" { -#endif - -/** - * @return The recommended flags to be used on the current machine. - * Does not include: - * RANDOMX_FLAG_LARGE_PAGES - * RANDOMX_FLAG_FULL_MEM - * RANDOMX_FLAG_SECURE - * These flags must be added manually if desired. - * On OpenBSD RANDOMX_FLAG_SECURE is enabled by default in JIT mode as W^X is enforced by the OS. - */ -RANDOMX_EXPORT randomx_flags randomx_get_flags(void); - -/** - * Creates a randomx_cache structure and allocates memory for RandomX Cache. - * - * @param flags is any combination of these 2 flags (each flag can be set or not set): - * RANDOMX_FLAG_LARGE_PAGES - allocate memory in large pages - * RANDOMX_FLAG_JIT - create cache structure with JIT compilation support; this makes - * subsequent Dataset initialization faster - * Optionally, one of these two flags may be selected: - * RANDOMX_FLAG_ARGON2_SSSE3 - optimized Argon2 for CPUs with the SSSE3 instruction set - * makes subsequent cache initialization faster - * RANDOMX_FLAG_ARGON2_AVX2 - optimized Argon2 for CPUs with the AVX2 instruction set - * makes subsequent cache initialization faster - * - * @return Pointer to an allocated randomx_cache structure. - * Returns NULL if: - * (1) memory allocation fails - * (2) the RANDOMX_FLAG_JIT is set and JIT compilation is not supported on the current platform - * (3) an invalid or unsupported RANDOMX_FLAG_ARGON2 value is set - */ -RANDOMX_EXPORT randomx_cache *randomx_alloc_cache(randomx_flags flags); - -/** - * Initializes the cache memory and SuperscalarHash using the provided key value. - * Does nothing if called again with the same key value. - * - * @param cache is a pointer to a previously allocated randomx_cache structure. Must not be NULL. - * @param key is a pointer to memory which contains the key value. Must not be NULL. - * @param keySize is the number of bytes of the key. -*/ -RANDOMX_EXPORT void randomx_init_cache(randomx_cache *cache, const void *key, size_t keySize); - -/** - * Releases all memory occupied by the randomx_cache structure. - * - * @param cache is a pointer to a previously allocated randomx_cache structure. -*/ -RANDOMX_EXPORT void randomx_release_cache(randomx_cache* cache); - -/** - * Creates a randomx_dataset structure and allocates memory for RandomX Dataset. - * - * @param flags is the initialization flags. Only one flag is supported (can be set or not set): - * RANDOMX_FLAG_LARGE_PAGES - allocate memory in large pages - * - * @return Pointer to an allocated randomx_dataset structure. - * NULL is returned if memory allocation fails. - */ -RANDOMX_EXPORT randomx_dataset *randomx_alloc_dataset(randomx_flags flags); - -/** - * Gets the number of items contained in the dataset. - * - * @return the number of items contained in the dataset. -*/ -RANDOMX_EXPORT unsigned long randomx_dataset_item_count(void); - -/** - * Initializes dataset items. - * - * Note: In order to use the Dataset, all items from 0 to (randomx_dataset_item_count() - 1) must be initialized. - * This may be done by several calls to this function using non-overlapping item sequences. - * - * @param dataset is a pointer to a previously allocated randomx_dataset structure. Must not be NULL. - * @param cache is a pointer to a previously allocated and initialized randomx_cache structure. Must not be NULL. - * @param startItem is the item number where intialization should start. - * @param itemCount is the number of items that should be initialized. -*/ -RANDOMX_EXPORT void randomx_init_dataset(randomx_dataset *dataset, randomx_cache *cache, unsigned long startItem, unsigned long itemCount); - -/** - * Returns a pointer to the internal memory buffer of the dataset structure. The size - * of the internal memory buffer is randomx_dataset_item_count() * RANDOMX_DATASET_ITEM_SIZE. - * - * @param dataset is a pointer to a previously allocated randomx_dataset structure. Must not be NULL. - * - * @return Pointer to the internal memory buffer of the dataset structure. -*/ -RANDOMX_EXPORT void *randomx_get_dataset_memory(randomx_dataset *dataset); - -/** - * Releases all memory occupied by the randomx_dataset structure. - * - * @param dataset is a pointer to a previously allocated randomx_dataset structure. -*/ -RANDOMX_EXPORT void randomx_release_dataset(randomx_dataset *dataset); - -/** - * Creates and initializes a RandomX virtual machine. - * - * @param flags is any combination of these 5 flags (each flag can be set or not set): - * RANDOMX_FLAG_LARGE_PAGES - allocate scratchpad memory in large pages - * RANDOMX_FLAG_HARD_AES - virtual machine will use hardware accelerated AES - * RANDOMX_FLAG_FULL_MEM - virtual machine will use the full dataset - * RANDOMX_FLAG_JIT - virtual machine will use a JIT compiler - * RANDOMX_FLAG_SECURE - when combined with RANDOMX_FLAG_JIT, the JIT pages are never - * writable and executable at the same time (W^X policy) - * The numeric values of the first 4 flags are ordered so that a higher value will provide - * faster hash calculation and a lower numeric value will provide higher portability. - * Using RANDOMX_FLAG_DEFAULT (all flags not set) works on all platforms, but is the slowest. - * @param cache is a pointer to an initialized randomx_cache structure. Can be - * NULL if RANDOMX_FLAG_FULL_MEM is set. - * @param dataset is a pointer to a randomx_dataset structure. Can be NULL - * if RANDOMX_FLAG_FULL_MEM is not set. - * - * @return Pointer to an initialized randomx_vm structure. - * Returns NULL if: - * (1) Scratchpad memory allocation fails. - * (2) The requested initialization flags are not supported on the current platform. - * (3) cache parameter is NULL and RANDOMX_FLAG_FULL_MEM is not set - * (4) dataset parameter is NULL and RANDOMX_FLAG_FULL_MEM is set -*/ -RANDOMX_EXPORT randomx_vm *randomx_create_vm(randomx_flags flags, randomx_cache *cache, randomx_dataset *dataset); - -/** - * Reinitializes a virtual machine with a new Cache. This function should be called anytime - * the Cache is reinitialized with a new key. Does nothing if called with a Cache containing - * the same key value as already set. - * - * @param machine is a pointer to a randomx_vm structure that was initialized - * without RANDOMX_FLAG_FULL_MEM. Must not be NULL. - * @param cache is a pointer to an initialized randomx_cache structure. Must not be NULL. -*/ -RANDOMX_EXPORT void randomx_vm_set_cache(randomx_vm *machine, randomx_cache* cache); - -/** - * Reinitializes a virtual machine with a new Dataset. - * - * @param machine is a pointer to a randomx_vm structure that was initialized - * with RANDOMX_FLAG_FULL_MEM. Must not be NULL. - * @param dataset is a pointer to an initialized randomx_dataset structure. Must not be NULL. -*/ -RANDOMX_EXPORT void randomx_vm_set_dataset(randomx_vm *machine, randomx_dataset *dataset); - -/** - * Releases all memory occupied by the randomx_vm structure. - * - * @param machine is a pointer to a previously created randomx_vm structure. -*/ -RANDOMX_EXPORT void randomx_destroy_vm(randomx_vm *machine); - -/** - * Calculates a RandomX hash value. - * - * @param machine is a pointer to a randomx_vm structure. Must not be NULL. - * @param input is a pointer to memory to be hashed. Must not be NULL. - * @param inputSize is the number of bytes to be hashed. - * @param output is a pointer to memory where the hash will be stored. Must not - * be NULL and at least RANDOMX_HASH_SIZE bytes must be available for writing. -*/ -RANDOMX_EXPORT void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output); - -/** - * Set of functions used to calculate multiple RandomX hashes more efficiently. - * randomx_calculate_hash_first will begin a hash calculation. - * randomx_calculate_hash_next will output the hash value of the previous input - * and begin the calculation of the next hash. - * randomx_calculate_hash_last will output the hash value of the previous input. - * - * WARNING: These functions may alter the floating point rounding mode of the calling thread. - * - * @param machine is a pointer to a randomx_vm structure. Must not be NULL. - * @param input is a pointer to memory to be hashed. Must not be NULL. - * @param inputSize is the number of bytes to be hashed. - * @param nextInput is a pointer to memory to be hashed for the next hash. Must not be NULL. - * @param nextInputSize is the number of bytes to be hashed for the next hash. - * @param output is a pointer to memory where the hash will be stored. Must not - * be NULL and at least RANDOMX_HASH_SIZE bytes must be available for writing. -*/ -RANDOMX_EXPORT void randomx_calculate_hash_first(randomx_vm* machine, const void* input, size_t inputSize); -RANDOMX_EXPORT void randomx_calculate_hash_next(randomx_vm* machine, const void* nextInput, size_t nextInputSize, void* output); -RANDOMX_EXPORT void randomx_calculate_hash_last(randomx_vm* machine, void* output); - -#if defined(__cplusplus) -} -#endif - -#endif diff --git a/external/src/randomx/src/reciprocal.c b/external/src/randomx/src/reciprocal.c deleted file mode 100644 index 22620f5..0000000 --- a/external/src/randomx/src/reciprocal.c +++ /dev/null @@ -1,80 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include -#include "reciprocal.h" - -/* - Calculates rcp = 2**x / divisor for highest integer x such that rcp < 2**64. - divisor must not be 0 or a power of 2 - - Equivalent x86 assembly (divisor in rcx): - - mov edx, 1 - mov r8, rcx - xor eax, eax - bsr rcx, rcx - shl rdx, cl - div r8 - ret - -*/ -uint64_t randomx_reciprocal(uint64_t divisor) { - - assert(divisor != 0); - - const uint64_t p2exp63 = 1ULL << 63; - - uint64_t quotient = p2exp63 / divisor, remainder = p2exp63 % divisor; - - unsigned bsr = 0; //highest set bit in divisor - - for (uint64_t bit = divisor; bit > 0; bit >>= 1) - bsr++; - - for (unsigned shift = 0; shift < bsr; shift++) { - if (remainder >= divisor - remainder) { - quotient = quotient * 2 + 1; - remainder = remainder * 2 - divisor; - } - else { - quotient = quotient * 2; - remainder = remainder * 2; - } - } - - return quotient; -} - -#if !RANDOMX_HAVE_FAST_RECIPROCAL - -uint64_t randomx_reciprocal_fast(uint64_t divisor) { - return randomx_reciprocal(divisor); -} - -#endif diff --git a/external/src/randomx/src/reciprocal.h b/external/src/randomx/src/reciprocal.h deleted file mode 100644 index 8858df2..0000000 --- a/external/src/randomx/src/reciprocal.h +++ /dev/null @@ -1,48 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include - -#if defined(_M_X64) || defined(__x86_64__) -#define RANDOMX_HAVE_FAST_RECIPROCAL 1 -#else -#define RANDOMX_HAVE_FAST_RECIPROCAL 0 -#endif - -#if defined(__cplusplus) -extern "C" { -#endif - -uint64_t randomx_reciprocal(uint64_t); -uint64_t randomx_reciprocal_fast(uint64_t); - -#if defined(__cplusplus) -} -#endif diff --git a/external/src/randomx/src/soft_aes.cpp b/external/src/randomx/src/soft_aes.cpp deleted file mode 100644 index 3e82fa2..0000000 --- a/external/src/randomx/src/soft_aes.cpp +++ /dev/null @@ -1,364 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include "soft_aes.h" - -alignas(16) const uint8_t sbox[256] = { - 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, - 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, - 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, - 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, - 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, - 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, - 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, - 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, - 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, - 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, - 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, - 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, - 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, - 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, - 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, - 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16, -}; - -alignas(16) const uint32_t lutEnc0[256] = { - 0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6, 0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591, - 0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56, 0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec, - 0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa, 0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb, - 0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45, 0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b, - 0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c, 0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83, - 0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9, 0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a, - 0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d, 0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f, - 0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df, 0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea, - 0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34, 0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b, - 0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d, 0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413, - 0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1, 0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6, - 0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972, 0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85, - 0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed, 0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511, - 0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe, 0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b, - 0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05, 0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1, - 0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142, 0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf, - 0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3, 0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e, - 0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a, 0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6, - 0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3, 0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b, - 0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428, 0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad, - 0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14, 0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8, - 0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4, 0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2, - 0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda, 0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949, - 0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf, 0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810, - 0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c, 0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697, - 0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e, 0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f, - 0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc, 0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c, - 0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969, 0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27, - 0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122, 0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433, - 0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9, 0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5, - 0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a, 0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0, - 0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e, 0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c, -}; - -alignas(16) const uint32_t lutEnc1[256] = { - 0x6363c6a5, 0x7c7cf884, 0x7777ee99, 0x7b7bf68d, 0xf2f2ff0d, 0x6b6bd6bd, 0x6f6fdeb1, 0xc5c59154, - 0x30306050, 0x01010203, 0x6767cea9, 0x2b2b567d, 0xfefee719, 0xd7d7b562, 0xabab4de6, 0x7676ec9a, - 0xcaca8f45, 0x82821f9d, 0xc9c98940, 0x7d7dfa87, 0xfafaef15, 0x5959b2eb, 0x47478ec9, 0xf0f0fb0b, - 0xadad41ec, 0xd4d4b367, 0xa2a25ffd, 0xafaf45ea, 0x9c9c23bf, 0xa4a453f7, 0x7272e496, 0xc0c09b5b, - 0xb7b775c2, 0xfdfde11c, 0x93933dae, 0x26264c6a, 0x36366c5a, 0x3f3f7e41, 0xf7f7f502, 0xcccc834f, - 0x3434685c, 0xa5a551f4, 0xe5e5d134, 0xf1f1f908, 0x7171e293, 0xd8d8ab73, 0x31316253, 0x15152a3f, - 0x0404080c, 0xc7c79552, 0x23234665, 0xc3c39d5e, 0x18183028, 0x969637a1, 0x05050a0f, 0x9a9a2fb5, - 0x07070e09, 0x12122436, 0x80801b9b, 0xe2e2df3d, 0xebebcd26, 0x27274e69, 0xb2b27fcd, 0x7575ea9f, - 0x0909121b, 0x83831d9e, 0x2c2c5874, 0x1a1a342e, 0x1b1b362d, 0x6e6edcb2, 0x5a5ab4ee, 0xa0a05bfb, - 0x5252a4f6, 0x3b3b764d, 0xd6d6b761, 0xb3b37dce, 0x2929527b, 0xe3e3dd3e, 0x2f2f5e71, 0x84841397, - 0x5353a6f5, 0xd1d1b968, 0x00000000, 0xededc12c, 0x20204060, 0xfcfce31f, 0xb1b179c8, 0x5b5bb6ed, - 0x6a6ad4be, 0xcbcb8d46, 0xbebe67d9, 0x3939724b, 0x4a4a94de, 0x4c4c98d4, 0x5858b0e8, 0xcfcf854a, - 0xd0d0bb6b, 0xefefc52a, 0xaaaa4fe5, 0xfbfbed16, 0x434386c5, 0x4d4d9ad7, 0x33336655, 0x85851194, - 0x45458acf, 0xf9f9e910, 0x02020406, 0x7f7ffe81, 0x5050a0f0, 0x3c3c7844, 0x9f9f25ba, 0xa8a84be3, - 0x5151a2f3, 0xa3a35dfe, 0x404080c0, 0x8f8f058a, 0x92923fad, 0x9d9d21bc, 0x38387048, 0xf5f5f104, - 0xbcbc63df, 0xb6b677c1, 0xdadaaf75, 0x21214263, 0x10102030, 0xffffe51a, 0xf3f3fd0e, 0xd2d2bf6d, - 0xcdcd814c, 0x0c0c1814, 0x13132635, 0xececc32f, 0x5f5fbee1, 0x979735a2, 0x444488cc, 0x17172e39, - 0xc4c49357, 0xa7a755f2, 0x7e7efc82, 0x3d3d7a47, 0x6464c8ac, 0x5d5dbae7, 0x1919322b, 0x7373e695, - 0x6060c0a0, 0x81811998, 0x4f4f9ed1, 0xdcdca37f, 0x22224466, 0x2a2a547e, 0x90903bab, 0x88880b83, - 0x46468cca, 0xeeeec729, 0xb8b86bd3, 0x1414283c, 0xdedea779, 0x5e5ebce2, 0x0b0b161d, 0xdbdbad76, - 0xe0e0db3b, 0x32326456, 0x3a3a744e, 0x0a0a141e, 0x494992db, 0x06060c0a, 0x2424486c, 0x5c5cb8e4, - 0xc2c29f5d, 0xd3d3bd6e, 0xacac43ef, 0x6262c4a6, 0x919139a8, 0x959531a4, 0xe4e4d337, 0x7979f28b, - 0xe7e7d532, 0xc8c88b43, 0x37376e59, 0x6d6ddab7, 0x8d8d018c, 0xd5d5b164, 0x4e4e9cd2, 0xa9a949e0, - 0x6c6cd8b4, 0x5656acfa, 0xf4f4f307, 0xeaeacf25, 0x6565caaf, 0x7a7af48e, 0xaeae47e9, 0x08081018, - 0xbaba6fd5, 0x7878f088, 0x25254a6f, 0x2e2e5c72, 0x1c1c3824, 0xa6a657f1, 0xb4b473c7, 0xc6c69751, - 0xe8e8cb23, 0xdddda17c, 0x7474e89c, 0x1f1f3e21, 0x4b4b96dd, 0xbdbd61dc, 0x8b8b0d86, 0x8a8a0f85, - 0x7070e090, 0x3e3e7c42, 0xb5b571c4, 0x6666ccaa, 0x484890d8, 0x03030605, 0xf6f6f701, 0x0e0e1c12, - 0x6161c2a3, 0x35356a5f, 0x5757aef9, 0xb9b969d0, 0x86861791, 0xc1c19958, 0x1d1d3a27, 0x9e9e27b9, - 0xe1e1d938, 0xf8f8eb13, 0x98982bb3, 0x11112233, 0x6969d2bb, 0xd9d9a970, 0x8e8e0789, 0x949433a7, - 0x9b9b2db6, 0x1e1e3c22, 0x87871592, 0xe9e9c920, 0xcece8749, 0x5555aaff, 0x28285078, 0xdfdfa57a, - 0x8c8c038f, 0xa1a159f8, 0x89890980, 0x0d0d1a17, 0xbfbf65da, 0xe6e6d731, 0x424284c6, 0x6868d0b8, - 0x414182c3, 0x999929b0, 0x2d2d5a77, 0x0f0f1e11, 0xb0b07bcb, 0x5454a8fc, 0xbbbb6dd6, 0x16162c3a, -}; - -alignas(16) const uint32_t lutEnc2[256] = { - 0x63c6a563, 0x7cf8847c, 0x77ee9977, 0x7bf68d7b, 0xf2ff0df2, 0x6bd6bd6b, 0x6fdeb16f, 0xc59154c5, - 0x30605030, 0x01020301, 0x67cea967, 0x2b567d2b, 0xfee719fe, 0xd7b562d7, 0xab4de6ab, 0x76ec9a76, - 0xca8f45ca, 0x821f9d82, 0xc98940c9, 0x7dfa877d, 0xfaef15fa, 0x59b2eb59, 0x478ec947, 0xf0fb0bf0, - 0xad41ecad, 0xd4b367d4, 0xa25ffda2, 0xaf45eaaf, 0x9c23bf9c, 0xa453f7a4, 0x72e49672, 0xc09b5bc0, - 0xb775c2b7, 0xfde11cfd, 0x933dae93, 0x264c6a26, 0x366c5a36, 0x3f7e413f, 0xf7f502f7, 0xcc834fcc, - 0x34685c34, 0xa551f4a5, 0xe5d134e5, 0xf1f908f1, 0x71e29371, 0xd8ab73d8, 0x31625331, 0x152a3f15, - 0x04080c04, 0xc79552c7, 0x23466523, 0xc39d5ec3, 0x18302818, 0x9637a196, 0x050a0f05, 0x9a2fb59a, - 0x070e0907, 0x12243612, 0x801b9b80, 0xe2df3de2, 0xebcd26eb, 0x274e6927, 0xb27fcdb2, 0x75ea9f75, - 0x09121b09, 0x831d9e83, 0x2c58742c, 0x1a342e1a, 0x1b362d1b, 0x6edcb26e, 0x5ab4ee5a, 0xa05bfba0, - 0x52a4f652, 0x3b764d3b, 0xd6b761d6, 0xb37dceb3, 0x29527b29, 0xe3dd3ee3, 0x2f5e712f, 0x84139784, - 0x53a6f553, 0xd1b968d1, 0x00000000, 0xedc12ced, 0x20406020, 0xfce31ffc, 0xb179c8b1, 0x5bb6ed5b, - 0x6ad4be6a, 0xcb8d46cb, 0xbe67d9be, 0x39724b39, 0x4a94de4a, 0x4c98d44c, 0x58b0e858, 0xcf854acf, - 0xd0bb6bd0, 0xefc52aef, 0xaa4fe5aa, 0xfbed16fb, 0x4386c543, 0x4d9ad74d, 0x33665533, 0x85119485, - 0x458acf45, 0xf9e910f9, 0x02040602, 0x7ffe817f, 0x50a0f050, 0x3c78443c, 0x9f25ba9f, 0xa84be3a8, - 0x51a2f351, 0xa35dfea3, 0x4080c040, 0x8f058a8f, 0x923fad92, 0x9d21bc9d, 0x38704838, 0xf5f104f5, - 0xbc63dfbc, 0xb677c1b6, 0xdaaf75da, 0x21426321, 0x10203010, 0xffe51aff, 0xf3fd0ef3, 0xd2bf6dd2, - 0xcd814ccd, 0x0c18140c, 0x13263513, 0xecc32fec, 0x5fbee15f, 0x9735a297, 0x4488cc44, 0x172e3917, - 0xc49357c4, 0xa755f2a7, 0x7efc827e, 0x3d7a473d, 0x64c8ac64, 0x5dbae75d, 0x19322b19, 0x73e69573, - 0x60c0a060, 0x81199881, 0x4f9ed14f, 0xdca37fdc, 0x22446622, 0x2a547e2a, 0x903bab90, 0x880b8388, - 0x468cca46, 0xeec729ee, 0xb86bd3b8, 0x14283c14, 0xdea779de, 0x5ebce25e, 0x0b161d0b, 0xdbad76db, - 0xe0db3be0, 0x32645632, 0x3a744e3a, 0x0a141e0a, 0x4992db49, 0x060c0a06, 0x24486c24, 0x5cb8e45c, - 0xc29f5dc2, 0xd3bd6ed3, 0xac43efac, 0x62c4a662, 0x9139a891, 0x9531a495, 0xe4d337e4, 0x79f28b79, - 0xe7d532e7, 0xc88b43c8, 0x376e5937, 0x6ddab76d, 0x8d018c8d, 0xd5b164d5, 0x4e9cd24e, 0xa949e0a9, - 0x6cd8b46c, 0x56acfa56, 0xf4f307f4, 0xeacf25ea, 0x65caaf65, 0x7af48e7a, 0xae47e9ae, 0x08101808, - 0xba6fd5ba, 0x78f08878, 0x254a6f25, 0x2e5c722e, 0x1c38241c, 0xa657f1a6, 0xb473c7b4, 0xc69751c6, - 0xe8cb23e8, 0xdda17cdd, 0x74e89c74, 0x1f3e211f, 0x4b96dd4b, 0xbd61dcbd, 0x8b0d868b, 0x8a0f858a, - 0x70e09070, 0x3e7c423e, 0xb571c4b5, 0x66ccaa66, 0x4890d848, 0x03060503, 0xf6f701f6, 0x0e1c120e, - 0x61c2a361, 0x356a5f35, 0x57aef957, 0xb969d0b9, 0x86179186, 0xc19958c1, 0x1d3a271d, 0x9e27b99e, - 0xe1d938e1, 0xf8eb13f8, 0x982bb398, 0x11223311, 0x69d2bb69, 0xd9a970d9, 0x8e07898e, 0x9433a794, - 0x9b2db69b, 0x1e3c221e, 0x87159287, 0xe9c920e9, 0xce8749ce, 0x55aaff55, 0x28507828, 0xdfa57adf, - 0x8c038f8c, 0xa159f8a1, 0x89098089, 0x0d1a170d, 0xbf65dabf, 0xe6d731e6, 0x4284c642, 0x68d0b868, - 0x4182c341, 0x9929b099, 0x2d5a772d, 0x0f1e110f, 0xb07bcbb0, 0x54a8fc54, 0xbb6dd6bb, 0x162c3a16, -}; - -alignas(16) const uint32_t lutEnc3[256] = { - 0xc6a56363, 0xf8847c7c, 0xee997777, 0xf68d7b7b, 0xff0df2f2, 0xd6bd6b6b, 0xdeb16f6f, 0x9154c5c5, - 0x60503030, 0x02030101, 0xcea96767, 0x567d2b2b, 0xe719fefe, 0xb562d7d7, 0x4de6abab, 0xec9a7676, - 0x8f45caca, 0x1f9d8282, 0x8940c9c9, 0xfa877d7d, 0xef15fafa, 0xb2eb5959, 0x8ec94747, 0xfb0bf0f0, - 0x41ecadad, 0xb367d4d4, 0x5ffda2a2, 0x45eaafaf, 0x23bf9c9c, 0x53f7a4a4, 0xe4967272, 0x9b5bc0c0, - 0x75c2b7b7, 0xe11cfdfd, 0x3dae9393, 0x4c6a2626, 0x6c5a3636, 0x7e413f3f, 0xf502f7f7, 0x834fcccc, - 0x685c3434, 0x51f4a5a5, 0xd134e5e5, 0xf908f1f1, 0xe2937171, 0xab73d8d8, 0x62533131, 0x2a3f1515, - 0x080c0404, 0x9552c7c7, 0x46652323, 0x9d5ec3c3, 0x30281818, 0x37a19696, 0x0a0f0505, 0x2fb59a9a, - 0x0e090707, 0x24361212, 0x1b9b8080, 0xdf3de2e2, 0xcd26ebeb, 0x4e692727, 0x7fcdb2b2, 0xea9f7575, - 0x121b0909, 0x1d9e8383, 0x58742c2c, 0x342e1a1a, 0x362d1b1b, 0xdcb26e6e, 0xb4ee5a5a, 0x5bfba0a0, - 0xa4f65252, 0x764d3b3b, 0xb761d6d6, 0x7dceb3b3, 0x527b2929, 0xdd3ee3e3, 0x5e712f2f, 0x13978484, - 0xa6f55353, 0xb968d1d1, 0x00000000, 0xc12ceded, 0x40602020, 0xe31ffcfc, 0x79c8b1b1, 0xb6ed5b5b, - 0xd4be6a6a, 0x8d46cbcb, 0x67d9bebe, 0x724b3939, 0x94de4a4a, 0x98d44c4c, 0xb0e85858, 0x854acfcf, - 0xbb6bd0d0, 0xc52aefef, 0x4fe5aaaa, 0xed16fbfb, 0x86c54343, 0x9ad74d4d, 0x66553333, 0x11948585, - 0x8acf4545, 0xe910f9f9, 0x04060202, 0xfe817f7f, 0xa0f05050, 0x78443c3c, 0x25ba9f9f, 0x4be3a8a8, - 0xa2f35151, 0x5dfea3a3, 0x80c04040, 0x058a8f8f, 0x3fad9292, 0x21bc9d9d, 0x70483838, 0xf104f5f5, - 0x63dfbcbc, 0x77c1b6b6, 0xaf75dada, 0x42632121, 0x20301010, 0xe51affff, 0xfd0ef3f3, 0xbf6dd2d2, - 0x814ccdcd, 0x18140c0c, 0x26351313, 0xc32fecec, 0xbee15f5f, 0x35a29797, 0x88cc4444, 0x2e391717, - 0x9357c4c4, 0x55f2a7a7, 0xfc827e7e, 0x7a473d3d, 0xc8ac6464, 0xbae75d5d, 0x322b1919, 0xe6957373, - 0xc0a06060, 0x19988181, 0x9ed14f4f, 0xa37fdcdc, 0x44662222, 0x547e2a2a, 0x3bab9090, 0x0b838888, - 0x8cca4646, 0xc729eeee, 0x6bd3b8b8, 0x283c1414, 0xa779dede, 0xbce25e5e, 0x161d0b0b, 0xad76dbdb, - 0xdb3be0e0, 0x64563232, 0x744e3a3a, 0x141e0a0a, 0x92db4949, 0x0c0a0606, 0x486c2424, 0xb8e45c5c, - 0x9f5dc2c2, 0xbd6ed3d3, 0x43efacac, 0xc4a66262, 0x39a89191, 0x31a49595, 0xd337e4e4, 0xf28b7979, - 0xd532e7e7, 0x8b43c8c8, 0x6e593737, 0xdab76d6d, 0x018c8d8d, 0xb164d5d5, 0x9cd24e4e, 0x49e0a9a9, - 0xd8b46c6c, 0xacfa5656, 0xf307f4f4, 0xcf25eaea, 0xcaaf6565, 0xf48e7a7a, 0x47e9aeae, 0x10180808, - 0x6fd5baba, 0xf0887878, 0x4a6f2525, 0x5c722e2e, 0x38241c1c, 0x57f1a6a6, 0x73c7b4b4, 0x9751c6c6, - 0xcb23e8e8, 0xa17cdddd, 0xe89c7474, 0x3e211f1f, 0x96dd4b4b, 0x61dcbdbd, 0x0d868b8b, 0x0f858a8a, - 0xe0907070, 0x7c423e3e, 0x71c4b5b5, 0xccaa6666, 0x90d84848, 0x06050303, 0xf701f6f6, 0x1c120e0e, - 0xc2a36161, 0x6a5f3535, 0xaef95757, 0x69d0b9b9, 0x17918686, 0x9958c1c1, 0x3a271d1d, 0x27b99e9e, - 0xd938e1e1, 0xeb13f8f8, 0x2bb39898, 0x22331111, 0xd2bb6969, 0xa970d9d9, 0x07898e8e, 0x33a79494, - 0x2db69b9b, 0x3c221e1e, 0x15928787, 0xc920e9e9, 0x8749cece, 0xaaff5555, 0x50782828, 0xa57adfdf, - 0x038f8c8c, 0x59f8a1a1, 0x09808989, 0x1a170d0d, 0x65dabfbf, 0xd731e6e6, 0x84c64242, 0xd0b86868, - 0x82c34141, 0x29b09999, 0x5a772d2d, 0x1e110f0f, 0x7bcbb0b0, 0xa8fc5454, 0x6dd6bbbb, 0x2c3a1616, -}; - -alignas(16) const uint32_t lutDec0[256] = { - 0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a, 0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b, - 0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5, 0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5, - 0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d, 0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b, - 0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295, 0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e, - 0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927, 0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d, - 0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362, 0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9, - 0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52, 0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566, - 0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3, 0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed, - 0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e, 0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4, - 0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4, 0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd, - 0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d, 0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060, - 0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967, 0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879, - 0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000, 0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c, - 0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36, 0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624, - 0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b, 0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c, - 0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12, 0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14, - 0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3, 0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b, - 0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8, 0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684, - 0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7, 0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177, - 0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947, 0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322, - 0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498, 0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f, - 0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54, 0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382, - 0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf, 0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb, - 0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83, 0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef, - 0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029, 0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235, - 0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733, 0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117, - 0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4, 0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546, - 0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb, 0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d, - 0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb, 0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a, - 0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773, 0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478, - 0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2, 0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff, - 0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664, 0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0, -}; - -alignas(16) const uint32_t lutDec1[256] = { - 0xa7f45150, 0x65417e53, 0xa4171ac3, 0x5e273a96, 0x6bab3bcb, 0x459d1ff1, 0x58faacab, 0x03e34b93, - 0xfa302055, 0x6d76adf6, 0x76cc8891, 0x4c02f525, 0xd7e54ffc, 0xcb2ac5d7, 0x44352680, 0xa362b58f, - 0x5ab1de49, 0x1bba2567, 0x0eea4598, 0xc0fe5de1, 0x752fc302, 0xf04c8112, 0x97468da3, 0xf9d36bc6, - 0x5f8f03e7, 0x9c921595, 0x7a6dbfeb, 0x595295da, 0x83bed42d, 0x217458d3, 0x69e04929, 0xc8c98e44, - 0x89c2756a, 0x798ef478, 0x3e58996b, 0x71b927dd, 0x4fe1beb6, 0xad88f017, 0xac20c966, 0x3ace7db4, - 0x4adf6318, 0x311ae582, 0x33519760, 0x7f536245, 0x7764b1e0, 0xae6bbb84, 0xa081fe1c, 0x2b08f994, - 0x68487058, 0xfd458f19, 0x6cde9487, 0xf87b52b7, 0xd373ab23, 0x024b72e2, 0x8f1fe357, 0xab55662a, - 0x28ebb207, 0xc2b52f03, 0x7bc5869a, 0x0837d3a5, 0x872830f2, 0xa5bf23b2, 0x6a0302ba, 0x8216ed5c, - 0x1ccf8a2b, 0xb479a792, 0xf207f3f0, 0xe2694ea1, 0xf4da65cd, 0xbe0506d5, 0x6234d11f, 0xfea6c48a, - 0x532e349d, 0x55f3a2a0, 0xe18a0532, 0xebf6a475, 0xec830b39, 0xef6040aa, 0x9f715e06, 0x106ebd51, - 0x8a213ef9, 0x06dd963d, 0x053eddae, 0xbde64d46, 0x8d5491b5, 0x5dc47105, 0xd406046f, 0x155060ff, - 0xfb981924, 0xe9bdd697, 0x434089cc, 0x9ed96777, 0x42e8b0bd, 0x8b890788, 0x5b19e738, 0xeec879db, - 0x0a7ca147, 0x0f427ce9, 0x1e84f8c9, 0x00000000, 0x86800983, 0xed2b3248, 0x70111eac, 0x725a6c4e, - 0xff0efdfb, 0x38850f56, 0xd5ae3d1e, 0x392d3627, 0xd90f0a64, 0xa65c6821, 0x545b9bd1, 0x2e36243a, - 0x670a0cb1, 0xe757930f, 0x96eeb4d2, 0x919b1b9e, 0xc5c0804f, 0x20dc61a2, 0x4b775a69, 0x1a121c16, - 0xba93e20a, 0x2aa0c0e5, 0xe0223c43, 0x171b121d, 0x0d090e0b, 0xc78bf2ad, 0xa8b62db9, 0xa91e14c8, - 0x19f15785, 0x0775af4c, 0xdd99eebb, 0x607fa3fd, 0x2601f79f, 0xf5725cbc, 0x3b6644c5, 0x7efb5b34, - 0x29438b76, 0xc623cbdc, 0xfcedb668, 0xf1e4b863, 0xdc31d7ca, 0x85634210, 0x22971340, 0x11c68420, - 0x244a857d, 0x3dbbd2f8, 0x32f9ae11, 0xa129c76d, 0x2f9e1d4b, 0x30b2dcf3, 0x52860dec, 0xe3c177d0, - 0x16b32b6c, 0xb970a999, 0x489411fa, 0x64e94722, 0x8cfca8c4, 0x3ff0a01a, 0x2c7d56d8, 0x903322ef, - 0x4e4987c7, 0xd138d9c1, 0xa2ca8cfe, 0x0bd49836, 0x81f5a6cf, 0xde7aa528, 0x8eb7da26, 0xbfad3fa4, - 0x9d3a2ce4, 0x9278500d, 0xcc5f6a9b, 0x467e5462, 0x138df6c2, 0xb8d890e8, 0xf7392e5e, 0xafc382f5, - 0x805d9fbe, 0x93d0697c, 0x2dd56fa9, 0x1225cfb3, 0x99acc83b, 0x7d1810a7, 0x639ce86e, 0xbb3bdb7b, - 0x7826cd09, 0x18596ef4, 0xb79aec01, 0x9a4f83a8, 0x6e95e665, 0xe6ffaa7e, 0xcfbc2108, 0xe815efe6, - 0x9be7bad9, 0x366f4ace, 0x099fead4, 0x7cb029d6, 0xb2a431af, 0x233f2a31, 0x94a5c630, 0x66a235c0, - 0xbc4e7437, 0xca82fca6, 0xd090e0b0, 0xd8a73315, 0x9804f14a, 0xdaec41f7, 0x50cd7f0e, 0xf691172f, - 0xd64d768d, 0xb0ef434d, 0x4daacc54, 0x0496e4df, 0xb5d19ee3, 0x886a4c1b, 0x1f2cc1b8, 0x5165467f, - 0xea5e9d04, 0x358c015d, 0x7487fa73, 0x410bfb2e, 0x1d67b35a, 0xd2db9252, 0x5610e933, 0x47d66d13, - 0x61d79a8c, 0x0ca1377a, 0x14f8598e, 0x3c13eb89, 0x27a9ceee, 0xc961b735, 0xe51ce1ed, 0xb1477a3c, - 0xdfd29c59, 0x73f2553f, 0xce141879, 0x37c773bf, 0xcdf753ea, 0xaafd5f5b, 0x6f3ddf14, 0xdb447886, - 0xf3afca81, 0xc468b93e, 0x3424382c, 0x40a3c25f, 0xc31d1672, 0x25e2bc0c, 0x493c288b, 0x950dff41, - 0x01a83971, 0xb30c08de, 0xe4b4d89c, 0xc1566490, 0x84cb7b61, 0xb632d570, 0x5c6c4874, 0x57b8d042, -}; - -alignas(16) const uint32_t lutDec2[256] = { - 0xf45150a7, 0x417e5365, 0x171ac3a4, 0x273a965e, 0xab3bcb6b, 0x9d1ff145, 0xfaacab58, 0xe34b9303, - 0x302055fa, 0x76adf66d, 0xcc889176, 0x02f5254c, 0xe54ffcd7, 0x2ac5d7cb, 0x35268044, 0x62b58fa3, - 0xb1de495a, 0xba25671b, 0xea45980e, 0xfe5de1c0, 0x2fc30275, 0x4c8112f0, 0x468da397, 0xd36bc6f9, - 0x8f03e75f, 0x9215959c, 0x6dbfeb7a, 0x5295da59, 0xbed42d83, 0x7458d321, 0xe0492969, 0xc98e44c8, - 0xc2756a89, 0x8ef47879, 0x58996b3e, 0xb927dd71, 0xe1beb64f, 0x88f017ad, 0x20c966ac, 0xce7db43a, - 0xdf63184a, 0x1ae58231, 0x51976033, 0x5362457f, 0x64b1e077, 0x6bbb84ae, 0x81fe1ca0, 0x08f9942b, - 0x48705868, 0x458f19fd, 0xde94876c, 0x7b52b7f8, 0x73ab23d3, 0x4b72e202, 0x1fe3578f, 0x55662aab, - 0xebb20728, 0xb52f03c2, 0xc5869a7b, 0x37d3a508, 0x2830f287, 0xbf23b2a5, 0x0302ba6a, 0x16ed5c82, - 0xcf8a2b1c, 0x79a792b4, 0x07f3f0f2, 0x694ea1e2, 0xda65cdf4, 0x0506d5be, 0x34d11f62, 0xa6c48afe, - 0x2e349d53, 0xf3a2a055, 0x8a0532e1, 0xf6a475eb, 0x830b39ec, 0x6040aaef, 0x715e069f, 0x6ebd5110, - 0x213ef98a, 0xdd963d06, 0x3eddae05, 0xe64d46bd, 0x5491b58d, 0xc471055d, 0x06046fd4, 0x5060ff15, - 0x981924fb, 0xbdd697e9, 0x4089cc43, 0xd967779e, 0xe8b0bd42, 0x8907888b, 0x19e7385b, 0xc879dbee, - 0x7ca1470a, 0x427ce90f, 0x84f8c91e, 0x00000000, 0x80098386, 0x2b3248ed, 0x111eac70, 0x5a6c4e72, - 0x0efdfbff, 0x850f5638, 0xae3d1ed5, 0x2d362739, 0x0f0a64d9, 0x5c6821a6, 0x5b9bd154, 0x36243a2e, - 0x0a0cb167, 0x57930fe7, 0xeeb4d296, 0x9b1b9e91, 0xc0804fc5, 0xdc61a220, 0x775a694b, 0x121c161a, - 0x93e20aba, 0xa0c0e52a, 0x223c43e0, 0x1b121d17, 0x090e0b0d, 0x8bf2adc7, 0xb62db9a8, 0x1e14c8a9, - 0xf1578519, 0x75af4c07, 0x99eebbdd, 0x7fa3fd60, 0x01f79f26, 0x725cbcf5, 0x6644c53b, 0xfb5b347e, - 0x438b7629, 0x23cbdcc6, 0xedb668fc, 0xe4b863f1, 0x31d7cadc, 0x63421085, 0x97134022, 0xc6842011, - 0x4a857d24, 0xbbd2f83d, 0xf9ae1132, 0x29c76da1, 0x9e1d4b2f, 0xb2dcf330, 0x860dec52, 0xc177d0e3, - 0xb32b6c16, 0x70a999b9, 0x9411fa48, 0xe9472264, 0xfca8c48c, 0xf0a01a3f, 0x7d56d82c, 0x3322ef90, - 0x4987c74e, 0x38d9c1d1, 0xca8cfea2, 0xd498360b, 0xf5a6cf81, 0x7aa528de, 0xb7da268e, 0xad3fa4bf, - 0x3a2ce49d, 0x78500d92, 0x5f6a9bcc, 0x7e546246, 0x8df6c213, 0xd890e8b8, 0x392e5ef7, 0xc382f5af, - 0x5d9fbe80, 0xd0697c93, 0xd56fa92d, 0x25cfb312, 0xacc83b99, 0x1810a77d, 0x9ce86e63, 0x3bdb7bbb, - 0x26cd0978, 0x596ef418, 0x9aec01b7, 0x4f83a89a, 0x95e6656e, 0xffaa7ee6, 0xbc2108cf, 0x15efe6e8, - 0xe7bad99b, 0x6f4ace36, 0x9fead409, 0xb029d67c, 0xa431afb2, 0x3f2a3123, 0xa5c63094, 0xa235c066, - 0x4e7437bc, 0x82fca6ca, 0x90e0b0d0, 0xa73315d8, 0x04f14a98, 0xec41f7da, 0xcd7f0e50, 0x91172ff6, - 0x4d768dd6, 0xef434db0, 0xaacc544d, 0x96e4df04, 0xd19ee3b5, 0x6a4c1b88, 0x2cc1b81f, 0x65467f51, - 0x5e9d04ea, 0x8c015d35, 0x87fa7374, 0x0bfb2e41, 0x67b35a1d, 0xdb9252d2, 0x10e93356, 0xd66d1347, - 0xd79a8c61, 0xa1377a0c, 0xf8598e14, 0x13eb893c, 0xa9ceee27, 0x61b735c9, 0x1ce1ede5, 0x477a3cb1, - 0xd29c59df, 0xf2553f73, 0x141879ce, 0xc773bf37, 0xf753eacd, 0xfd5f5baa, 0x3ddf146f, 0x447886db, - 0xafca81f3, 0x68b93ec4, 0x24382c34, 0xa3c25f40, 0x1d1672c3, 0xe2bc0c25, 0x3c288b49, 0x0dff4195, - 0xa8397101, 0x0c08deb3, 0xb4d89ce4, 0x566490c1, 0xcb7b6184, 0x32d570b6, 0x6c48745c, 0xb8d04257, -}; - -alignas(16) const uint32_t lutDec3[256] = { - 0x5150a7f4, 0x7e536541, 0x1ac3a417, 0x3a965e27, 0x3bcb6bab, 0x1ff1459d, 0xacab58fa, 0x4b9303e3, - 0x2055fa30, 0xadf66d76, 0x889176cc, 0xf5254c02, 0x4ffcd7e5, 0xc5d7cb2a, 0x26804435, 0xb58fa362, - 0xde495ab1, 0x25671bba, 0x45980eea, 0x5de1c0fe, 0xc302752f, 0x8112f04c, 0x8da39746, 0x6bc6f9d3, - 0x03e75f8f, 0x15959c92, 0xbfeb7a6d, 0x95da5952, 0xd42d83be, 0x58d32174, 0x492969e0, 0x8e44c8c9, - 0x756a89c2, 0xf478798e, 0x996b3e58, 0x27dd71b9, 0xbeb64fe1, 0xf017ad88, 0xc966ac20, 0x7db43ace, - 0x63184adf, 0xe582311a, 0x97603351, 0x62457f53, 0xb1e07764, 0xbb84ae6b, 0xfe1ca081, 0xf9942b08, - 0x70586848, 0x8f19fd45, 0x94876cde, 0x52b7f87b, 0xab23d373, 0x72e2024b, 0xe3578f1f, 0x662aab55, - 0xb20728eb, 0x2f03c2b5, 0x869a7bc5, 0xd3a50837, 0x30f28728, 0x23b2a5bf, 0x02ba6a03, 0xed5c8216, - 0x8a2b1ccf, 0xa792b479, 0xf3f0f207, 0x4ea1e269, 0x65cdf4da, 0x06d5be05, 0xd11f6234, 0xc48afea6, - 0x349d532e, 0xa2a055f3, 0x0532e18a, 0xa475ebf6, 0x0b39ec83, 0x40aaef60, 0x5e069f71, 0xbd51106e, - 0x3ef98a21, 0x963d06dd, 0xddae053e, 0x4d46bde6, 0x91b58d54, 0x71055dc4, 0x046fd406, 0x60ff1550, - 0x1924fb98, 0xd697e9bd, 0x89cc4340, 0x67779ed9, 0xb0bd42e8, 0x07888b89, 0xe7385b19, 0x79dbeec8, - 0xa1470a7c, 0x7ce90f42, 0xf8c91e84, 0x00000000, 0x09838680, 0x3248ed2b, 0x1eac7011, 0x6c4e725a, - 0xfdfbff0e, 0x0f563885, 0x3d1ed5ae, 0x3627392d, 0x0a64d90f, 0x6821a65c, 0x9bd1545b, 0x243a2e36, - 0x0cb1670a, 0x930fe757, 0xb4d296ee, 0x1b9e919b, 0x804fc5c0, 0x61a220dc, 0x5a694b77, 0x1c161a12, - 0xe20aba93, 0xc0e52aa0, 0x3c43e022, 0x121d171b, 0x0e0b0d09, 0xf2adc78b, 0x2db9a8b6, 0x14c8a91e, - 0x578519f1, 0xaf4c0775, 0xeebbdd99, 0xa3fd607f, 0xf79f2601, 0x5cbcf572, 0x44c53b66, 0x5b347efb, - 0x8b762943, 0xcbdcc623, 0xb668fced, 0xb863f1e4, 0xd7cadc31, 0x42108563, 0x13402297, 0x842011c6, - 0x857d244a, 0xd2f83dbb, 0xae1132f9, 0xc76da129, 0x1d4b2f9e, 0xdcf330b2, 0x0dec5286, 0x77d0e3c1, - 0x2b6c16b3, 0xa999b970, 0x11fa4894, 0x472264e9, 0xa8c48cfc, 0xa01a3ff0, 0x56d82c7d, 0x22ef9033, - 0x87c74e49, 0xd9c1d138, 0x8cfea2ca, 0x98360bd4, 0xa6cf81f5, 0xa528de7a, 0xda268eb7, 0x3fa4bfad, - 0x2ce49d3a, 0x500d9278, 0x6a9bcc5f, 0x5462467e, 0xf6c2138d, 0x90e8b8d8, 0x2e5ef739, 0x82f5afc3, - 0x9fbe805d, 0x697c93d0, 0x6fa92dd5, 0xcfb31225, 0xc83b99ac, 0x10a77d18, 0xe86e639c, 0xdb7bbb3b, - 0xcd097826, 0x6ef41859, 0xec01b79a, 0x83a89a4f, 0xe6656e95, 0xaa7ee6ff, 0x2108cfbc, 0xefe6e815, - 0xbad99be7, 0x4ace366f, 0xead4099f, 0x29d67cb0, 0x31afb2a4, 0x2a31233f, 0xc63094a5, 0x35c066a2, - 0x7437bc4e, 0xfca6ca82, 0xe0b0d090, 0x3315d8a7, 0xf14a9804, 0x41f7daec, 0x7f0e50cd, 0x172ff691, - 0x768dd64d, 0x434db0ef, 0xcc544daa, 0xe4df0496, 0x9ee3b5d1, 0x4c1b886a, 0xc1b81f2c, 0x467f5165, - 0x9d04ea5e, 0x015d358c, 0xfa737487, 0xfb2e410b, 0xb35a1d67, 0x9252d2db, 0xe9335610, 0x6d1347d6, - 0x9a8c61d7, 0x377a0ca1, 0x598e14f8, 0xeb893c13, 0xceee27a9, 0xb735c961, 0xe1ede51c, 0x7a3cb147, - 0x9c59dfd2, 0x553f73f2, 0x1879ce14, 0x73bf37c7, 0x53eacdf7, 0x5f5baafd, 0xdf146f3d, 0x7886db44, - 0xca81f3af, 0xb93ec468, 0x382c3424, 0xc25f40a3, 0x1672c31d, 0xbc0c25e2, 0x288b493c, 0xff41950d, - 0x397101a8, 0x08deb30c, 0xd89ce4b4, 0x6490c156, 0x7b6184cb, 0xd570b632, 0x48745c6c, 0xd04257b8, -}; - -rx_vec_i128 soft_aesenc(rx_vec_i128 in, rx_vec_i128 key) { - uint32_t s0, s1, s2, s3; - - s0 = rx_vec_i128_w(in); - s1 = rx_vec_i128_z(in); - s2 = rx_vec_i128_y(in); - s3 = rx_vec_i128_x(in); - - rx_vec_i128 out = rx_set_int_vec_i128( - (lutEnc0[s0 & 0xff] ^ lutEnc1[(s3 >> 8) & 0xff] ^ lutEnc2[(s2 >> 16) & 0xff] ^ lutEnc3[s1 >> 24]), - (lutEnc0[s1 & 0xff] ^ lutEnc1[(s0 >> 8) & 0xff] ^ lutEnc2[(s3 >> 16) & 0xff] ^ lutEnc3[s2 >> 24]), - (lutEnc0[s2 & 0xff] ^ lutEnc1[(s1 >> 8) & 0xff] ^ lutEnc2[(s0 >> 16) & 0xff] ^ lutEnc3[s3 >> 24]), - (lutEnc0[s3 & 0xff] ^ lutEnc1[(s2 >> 8) & 0xff] ^ lutEnc2[(s1 >> 16) & 0xff] ^ lutEnc3[s0 >> 24]) - ); - - return rx_xor_vec_i128(out, key); -} - -rx_vec_i128 soft_aesdec(rx_vec_i128 in, rx_vec_i128 key) { - uint32_t s0, s1, s2, s3; - - s0 = rx_vec_i128_w(in); - s1 = rx_vec_i128_z(in); - s2 = rx_vec_i128_y(in); - s3 = rx_vec_i128_x(in); - - rx_vec_i128 out = rx_set_int_vec_i128( - (lutDec0[s0 & 0xff] ^ lutDec1[(s1 >> 8) & 0xff] ^ lutDec2[(s2 >> 16) & 0xff] ^ lutDec3[s3 >> 24]), - (lutDec0[s1 & 0xff] ^ lutDec1[(s2 >> 8) & 0xff] ^ lutDec2[(s3 >> 16) & 0xff] ^ lutDec3[s0 >> 24]), - (lutDec0[s2 & 0xff] ^ lutDec1[(s3 >> 8) & 0xff] ^ lutDec2[(s0 >> 16) & 0xff] ^ lutDec3[s1 >> 24]), - (lutDec0[s3 & 0xff] ^ lutDec1[(s0 >> 8) & 0xff] ^ lutDec2[(s1 >> 16) & 0xff] ^ lutDec3[s2 >> 24]) - ); - - return rx_xor_vec_i128(out, key); -} diff --git a/external/src/randomx/src/soft_aes.h b/external/src/randomx/src/soft_aes.h deleted file mode 100644 index 254f8d6..0000000 --- a/external/src/randomx/src/soft_aes.h +++ /dev/null @@ -1,46 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include -#include "intrin_portable.h" - -rx_vec_i128 soft_aesenc(rx_vec_i128 in, rx_vec_i128 key); - -rx_vec_i128 soft_aesdec(rx_vec_i128 in, rx_vec_i128 key); - -template -inline rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key) { - return soft ? soft_aesenc(in, key) : rx_aesenc_vec_i128(in, key); -} - -template -inline rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key) { - return soft ? soft_aesdec(in, key) : rx_aesdec_vec_i128(in, key); -} \ No newline at end of file diff --git a/external/src/randomx/src/superscalar.cpp b/external/src/randomx/src/superscalar.cpp deleted file mode 100644 index 4e9fd78..0000000 --- a/external/src/randomx/src/superscalar.cpp +++ /dev/null @@ -1,903 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include "configuration.h" -#include "program.hpp" -#include "blake2/endian.h" -#include -#include -#include -#include -#include -#include "superscalar.hpp" -#include "intrin_portable.h" -#include "reciprocal.h" -#include "common.hpp" - -namespace randomx { - - static bool isMultiplication(SuperscalarInstructionType type) { - return type == SuperscalarInstructionType::IMUL_R || type == SuperscalarInstructionType::IMULH_R || type == SuperscalarInstructionType::ISMULH_R || type == SuperscalarInstructionType::IMUL_RCP; - } - - //uOPs (micro-ops) are represented only by the execution port they can go to - namespace ExecutionPort { - using type = int; - constexpr type Null = 0; - constexpr type P0 = 1; - constexpr type P1 = 2; - constexpr type P5 = 4; - constexpr type P01 = P0 | P1; - constexpr type P05 = P0 | P5; - constexpr type P015 = P0 | P1 | P5; - } - - //Macro-operation as output of the x86 decoder - //Usually one macro-op = one x86 instruction, but 2 instructions are sometimes fused into 1 macro-op - //Macro-op can consist of 1 or 2 uOPs. - class MacroOp { - public: - MacroOp(const char* name, int size) - : name_(name), size_(size), latency_(0), uop1_(ExecutionPort::Null), uop2_(ExecutionPort::Null) {} - MacroOp(const char* name, int size, int latency, ExecutionPort::type uop) - : name_(name), size_(size), latency_(latency), uop1_(uop), uop2_(ExecutionPort::Null) {} - MacroOp(const char* name, int size, int latency, ExecutionPort::type uop1, ExecutionPort::type uop2) - : name_(name), size_(size), latency_(latency), uop1_(uop1), uop2_(uop2) {} - MacroOp(const MacroOp& parent, bool dependent) - : name_(parent.name_), size_(parent.size_), latency_(parent.latency_), uop1_(parent.uop1_), uop2_(parent.uop2_), dependent_(dependent) {} - const char* getName() const { - return name_; - } - int getSize() const { - return size_; - } - int getLatency() const { - return latency_; - } - ExecutionPort::type getUop1() const { - return uop1_; - } - ExecutionPort::type getUop2() const { - return uop2_; - } - bool isSimple() const { - return uop2_ == ExecutionPort::Null; - } - bool isEliminated() const { - return uop1_ == ExecutionPort::Null; - } - bool isDependent() const { - return dependent_; - } - static const MacroOp Add_rr; - static const MacroOp Add_ri; - static const MacroOp Lea_sib; - static const MacroOp Sub_rr; - static const MacroOp Imul_rr; - static const MacroOp Imul_r; - static const MacroOp Mul_r; - static const MacroOp Mov_rr; - static const MacroOp Mov_ri64; - static const MacroOp Xor_rr; - static const MacroOp Xor_ri; - static const MacroOp Ror_rcl; - static const MacroOp Ror_ri; - static const MacroOp TestJz_fused; - static const MacroOp Xor_self; - static const MacroOp Cmp_ri; - static const MacroOp Setcc_r; - private: - const char* name_; - int size_; - int latency_; - ExecutionPort::type uop1_; - ExecutionPort::type uop2_; - bool dependent_ = false; - }; - - //Size: 3 bytes - const MacroOp MacroOp::Add_rr = MacroOp("add r,r", 3, 1, ExecutionPort::P015); - const MacroOp MacroOp::Sub_rr = MacroOp("sub r,r", 3, 1, ExecutionPort::P015); - const MacroOp MacroOp::Xor_rr = MacroOp("xor r,r", 3, 1, ExecutionPort::P015); - const MacroOp MacroOp::Imul_r = MacroOp("imul r", 3, 4, ExecutionPort::P1, ExecutionPort::P5); - const MacroOp MacroOp::Mul_r = MacroOp("mul r", 3, 4, ExecutionPort::P1, ExecutionPort::P5); - const MacroOp MacroOp::Mov_rr = MacroOp("mov r,r", 3); - - //Size: 4 bytes - const MacroOp MacroOp::Lea_sib = MacroOp("lea r,r+r*s", 4, 1, ExecutionPort::P01); - const MacroOp MacroOp::Imul_rr = MacroOp("imul r,r", 4, 3, ExecutionPort::P1); - const MacroOp MacroOp::Ror_ri = MacroOp("ror r,i", 4, 1, ExecutionPort::P05); - - //Size: 7 bytes (can be optionally padded with nop to 8 or 9 bytes) - const MacroOp MacroOp::Add_ri = MacroOp("add r,i", 7, 1, ExecutionPort::P015); - const MacroOp MacroOp::Xor_ri = MacroOp("xor r,i", 7, 1, ExecutionPort::P015); - - //Size: 10 bytes - const MacroOp MacroOp::Mov_ri64 = MacroOp("mov rax,i64", 10, 1, ExecutionPort::P015); - - //Unused: - const MacroOp MacroOp::Ror_rcl = MacroOp("ror r,cl", 3, 1, ExecutionPort::P0, ExecutionPort::P5); - const MacroOp MacroOp::Xor_self = MacroOp("xor rcx,rcx", 3); - const MacroOp MacroOp::Cmp_ri = MacroOp("cmp r,i", 7, 1, ExecutionPort::P015); - const MacroOp MacroOp::Setcc_r = MacroOp("setcc cl", 3, 1, ExecutionPort::P05); - const MacroOp MacroOp::TestJz_fused = MacroOp("testjz r,i", 13, 0, ExecutionPort::P5); - - const MacroOp IMULH_R_ops_array[] = { MacroOp::Mov_rr, MacroOp::Mul_r, MacroOp::Mov_rr }; - const MacroOp ISMULH_R_ops_array[] = { MacroOp::Mov_rr, MacroOp::Imul_r, MacroOp::Mov_rr }; - const MacroOp IMUL_RCP_ops_array[] = { MacroOp::Mov_ri64, MacroOp(MacroOp::Imul_rr, true) }; - - class SuperscalarInstructionInfo { - public: - const char* getName() const { - return name_; - } - int getSize() const { - return ops_.size(); - } - bool isSimple() const { - return getSize() == 1; - } - int getLatency() const { - return latency_; - } - const MacroOp& getOp(int index) const { - return ops_[index]; - } - SuperscalarInstructionType getType() const { - return type_; - } - int getResultOp() const { - return resultOp_; - } - int getDstOp() const { - return dstOp_; - } - int getSrcOp() const { - return srcOp_; - } - static const SuperscalarInstructionInfo ISUB_R; - static const SuperscalarInstructionInfo IXOR_R; - static const SuperscalarInstructionInfo IADD_RS; - static const SuperscalarInstructionInfo IMUL_R; - static const SuperscalarInstructionInfo IROR_C; - static const SuperscalarInstructionInfo IADD_C7; - static const SuperscalarInstructionInfo IXOR_C7; - static const SuperscalarInstructionInfo IADD_C8; - static const SuperscalarInstructionInfo IXOR_C8; - static const SuperscalarInstructionInfo IADD_C9; - static const SuperscalarInstructionInfo IXOR_C9; - static const SuperscalarInstructionInfo IMULH_R; - static const SuperscalarInstructionInfo ISMULH_R; - static const SuperscalarInstructionInfo IMUL_RCP; - static const SuperscalarInstructionInfo NOP; - private: - const char* name_; - SuperscalarInstructionType type_; - std::vector ops_; - int latency_; - int resultOp_ = 0; - int dstOp_ = 0; - int srcOp_; - - SuperscalarInstructionInfo(const char* name) - : name_(name), type_(SuperscalarInstructionType::INVALID), latency_(0) {} - SuperscalarInstructionInfo(const char* name, SuperscalarInstructionType type, const MacroOp& op, int srcOp) - : name_(name), type_(type), latency_(op.getLatency()), srcOp_(srcOp) { - ops_.push_back(MacroOp(op)); - } - template - SuperscalarInstructionInfo(const char* name, SuperscalarInstructionType type, const MacroOp(&arr)[N], int resultOp, int dstOp, int srcOp) - : name_(name), type_(type), latency_(0), resultOp_(resultOp), dstOp_(dstOp), srcOp_(srcOp) { - for (unsigned i = 0; i < N; ++i) { - ops_.push_back(MacroOp(arr[i])); - latency_ += ops_.back().getLatency(); - } - static_assert(N > 1, "Invalid array size"); - } - }; - - const SuperscalarInstructionInfo SuperscalarInstructionInfo::ISUB_R = SuperscalarInstructionInfo("ISUB_R", SuperscalarInstructionType::ISUB_R, MacroOp::Sub_rr, 0); - const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_R = SuperscalarInstructionInfo("IXOR_R", SuperscalarInstructionType::IXOR_R, MacroOp::Xor_rr, 0); - const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_RS = SuperscalarInstructionInfo("IADD_RS", SuperscalarInstructionType::IADD_RS, MacroOp::Lea_sib, 0); - const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMUL_R = SuperscalarInstructionInfo("IMUL_R", SuperscalarInstructionType::IMUL_R, MacroOp::Imul_rr, 0); - const SuperscalarInstructionInfo SuperscalarInstructionInfo::IROR_C = SuperscalarInstructionInfo("IROR_C", SuperscalarInstructionType::IROR_C, MacroOp::Ror_ri, -1); - - const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C7 = SuperscalarInstructionInfo("IADD_C7", SuperscalarInstructionType::IADD_C7, MacroOp::Add_ri, -1); - const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C7 = SuperscalarInstructionInfo("IXOR_C7", SuperscalarInstructionType::IXOR_C7, MacroOp::Xor_ri, -1); - const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C8 = SuperscalarInstructionInfo("IADD_C8", SuperscalarInstructionType::IADD_C8, MacroOp::Add_ri, -1); - const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C8 = SuperscalarInstructionInfo("IXOR_C8", SuperscalarInstructionType::IXOR_C8, MacroOp::Xor_ri, -1); - const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C9 = SuperscalarInstructionInfo("IADD_C9", SuperscalarInstructionType::IADD_C9, MacroOp::Add_ri, -1); - const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C9 = SuperscalarInstructionInfo("IXOR_C9", SuperscalarInstructionType::IXOR_C9, MacroOp::Xor_ri, -1); - - const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMULH_R = SuperscalarInstructionInfo("IMULH_R", SuperscalarInstructionType::IMULH_R, IMULH_R_ops_array, 1, 0, 1); - const SuperscalarInstructionInfo SuperscalarInstructionInfo::ISMULH_R = SuperscalarInstructionInfo("ISMULH_R", SuperscalarInstructionType::ISMULH_R, ISMULH_R_ops_array, 1, 0, 1); - const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMUL_RCP = SuperscalarInstructionInfo("IMUL_RCP", SuperscalarInstructionType::IMUL_RCP, IMUL_RCP_ops_array, 1, 1, -1); - - const SuperscalarInstructionInfo SuperscalarInstructionInfo::NOP = SuperscalarInstructionInfo("NOP"); - - //these are some of the options how to split a 16-byte window into 3 or 4 x86 instructions. - //RandomX uses instructions with a native size of 3 (sub, xor, mul, mov), 4 (lea, mul), 7 (xor, add immediate) or 10 bytes (mov 64-bit immediate). - //Slots with sizes of 8 or 9 bytes need to be padded with a nop instruction. - const int buffer0[] = { 4, 8, 4 }; - const int buffer1[] = { 7, 3, 3, 3 }; - const int buffer2[] = { 3, 7, 3, 3 }; - const int buffer3[] = { 4, 9, 3 }; - const int buffer4[] = { 4, 4, 4, 4 }; - const int buffer5[] = { 3, 3, 10 }; - - class DecoderBuffer { - public: - static const DecoderBuffer Default; - template - DecoderBuffer(const char* name, int index, const int(&arr)[N]) - : name_(name), index_(index), counts_(arr), opsCount_(N) {} - const int* getCounts() const { - return counts_; - } - int getSize() const { - return opsCount_; - } - int getIndex() const { - return index_; - } - const char* getName() const { - return name_; - } - const DecoderBuffer* fetchNext(SuperscalarInstructionType instrType, int cycle, int mulCount, Blake2Generator& gen) const { - //If the current RandomX instruction is "IMULH", the next fetch configuration must be 3-3-10 - //because the full 128-bit multiplication instruction is 3 bytes long and decodes to 2 uOPs on Intel CPUs. - //Intel CPUs can decode at most 4 uOPs per cycle, so this requires a 2-1-1 configuration for a total of 3 macro ops. - if (instrType == SuperscalarInstructionType::IMULH_R || instrType == SuperscalarInstructionType::ISMULH_R) - return &decodeBuffer3310; - - //To make sure that the multiplication port is saturated, a 4-4-4-4 configuration is generated if the number of multiplications - //is lower than the number of cycles. - if (mulCount < cycle + 1) - return &decodeBuffer4444; - - //If the current RandomX instruction is "IMUL_RCP", the next buffer must begin with a 4-byte slot for multiplication. - if(instrType == SuperscalarInstructionType::IMUL_RCP) - return (gen.getByte() & 1) ? &decodeBuffer484 : &decodeBuffer493; - - //Default: select a random fetch configuration. - return fetchNextDefault(gen); - } - private: - const char* name_; - int index_; - const int* counts_; - int opsCount_; - DecoderBuffer() : index_(-1) {} - static const DecoderBuffer decodeBuffer484; - static const DecoderBuffer decodeBuffer7333; - static const DecoderBuffer decodeBuffer3733; - static const DecoderBuffer decodeBuffer493; - static const DecoderBuffer decodeBuffer4444; - static const DecoderBuffer decodeBuffer3310; - static const DecoderBuffer* decodeBuffers[4]; - const DecoderBuffer* fetchNextDefault(Blake2Generator& gen) const { - return decodeBuffers[gen.getByte() & 3]; - } - }; - - const DecoderBuffer DecoderBuffer::decodeBuffer484 = DecoderBuffer("4,8,4", 0, buffer0); - const DecoderBuffer DecoderBuffer::decodeBuffer7333 = DecoderBuffer("7,3,3,3", 1, buffer1); - const DecoderBuffer DecoderBuffer::decodeBuffer3733 = DecoderBuffer("3,7,3,3", 2, buffer2); - const DecoderBuffer DecoderBuffer::decodeBuffer493 = DecoderBuffer("4,9,3", 3, buffer3); - const DecoderBuffer DecoderBuffer::decodeBuffer4444 = DecoderBuffer("4,4,4,4", 4, buffer4); - const DecoderBuffer DecoderBuffer::decodeBuffer3310 = DecoderBuffer("3,3,10", 5, buffer5); - - const DecoderBuffer* DecoderBuffer::decodeBuffers[4] = { - &DecoderBuffer::decodeBuffer484, - &DecoderBuffer::decodeBuffer7333, - &DecoderBuffer::decodeBuffer3733, - &DecoderBuffer::decodeBuffer493, - }; - - const DecoderBuffer DecoderBuffer::Default = DecoderBuffer(); - - const SuperscalarInstructionInfo* slot_3[] = { &SuperscalarInstructionInfo::ISUB_R, &SuperscalarInstructionInfo::IXOR_R }; - const SuperscalarInstructionInfo* slot_3L[] = { &SuperscalarInstructionInfo::ISUB_R, &SuperscalarInstructionInfo::IXOR_R, &SuperscalarInstructionInfo::IMULH_R, &SuperscalarInstructionInfo::ISMULH_R }; - const SuperscalarInstructionInfo* slot_4[] = { &SuperscalarInstructionInfo::IROR_C, &SuperscalarInstructionInfo::IADD_RS }; - const SuperscalarInstructionInfo* slot_7[] = { &SuperscalarInstructionInfo::IXOR_C7, &SuperscalarInstructionInfo::IADD_C7 }; - const SuperscalarInstructionInfo* slot_8[] = { &SuperscalarInstructionInfo::IXOR_C8, &SuperscalarInstructionInfo::IADD_C8 }; - const SuperscalarInstructionInfo* slot_9[] = { &SuperscalarInstructionInfo::IXOR_C9, &SuperscalarInstructionInfo::IADD_C9 }; - const SuperscalarInstructionInfo* slot_10 = &SuperscalarInstructionInfo::IMUL_RCP; - - static bool selectRegister(std::vector& availableRegisters, Blake2Generator& gen, int& reg) { - int index; - if (availableRegisters.size() == 0) - return false; - - if (availableRegisters.size() > 1) { - index = gen.getUInt32() % availableRegisters.size(); - } - else { - index = 0; - } - reg = availableRegisters[index]; - return true; - } - - class RegisterInfo { - public: - RegisterInfo() : latency(0), lastOpGroup(SuperscalarInstructionType::INVALID), lastOpPar(-1), value(0) {} - int latency; - SuperscalarInstructionType lastOpGroup; - int lastOpPar; - int value; - }; - - //"SuperscalarInstruction" consists of one or more macro-ops - class SuperscalarInstruction { - public: - void toInstr(Instruction& instr) { //translate to a RandomX instruction format - instr.opcode = (int)getType(); - instr.dst = dst_; - instr.src = src_ >= 0 ? src_ : dst_; - instr.setMod(mod_); - instr.setImm32(imm32_); - } - - void createForSlot(Blake2Generator& gen, int slotSize, int fetchType, bool isLast, bool isFirst) { - switch (slotSize) - { - case 3: - //if this is the last slot, we can also select "IMULH" instructions - if (isLast) { - create(slot_3L[gen.getByte() & 3], gen); - } - else { - create(slot_3[gen.getByte() & 1], gen); - } - break; - case 4: - //if this is the 4-4-4-4 buffer, issue multiplications as the first 3 instructions - if (fetchType == 4 && !isLast) { - create(&SuperscalarInstructionInfo::IMUL_R, gen); - } - else { - create(slot_4[gen.getByte() & 1], gen); - } - break; - case 7: - create(slot_7[gen.getByte() & 1], gen); - break; - case 8: - create(slot_8[gen.getByte() & 1], gen); - break; - case 9: - create(slot_9[gen.getByte() & 1], gen); - break; - case 10: - create(slot_10, gen); - break; - default: - UNREACHABLE; - } - } - - void create(const SuperscalarInstructionInfo* info, Blake2Generator& gen) { - info_ = info; - reset(); - switch (info->getType()) - { - case SuperscalarInstructionType::ISUB_R: { - mod_ = 0; - imm32_ = 0; - opGroup_ = SuperscalarInstructionType::IADD_RS; - groupParIsSource_ = true; - } break; - - case SuperscalarInstructionType::IXOR_R: { - mod_ = 0; - imm32_ = 0; - opGroup_ = SuperscalarInstructionType::IXOR_R; - groupParIsSource_ = true; - } break; - - case SuperscalarInstructionType::IADD_RS: { - mod_ = gen.getByte(); - imm32_ = 0; - opGroup_ = SuperscalarInstructionType::IADD_RS; - groupParIsSource_ = true; - } break; - - case SuperscalarInstructionType::IMUL_R: { - mod_ = 0; - imm32_ = 0; - opGroup_ = SuperscalarInstructionType::IMUL_R; - groupParIsSource_ = true; - } break; - - case SuperscalarInstructionType::IROR_C: { - mod_ = 0; - do { - imm32_ = gen.getByte() & 63; - } while (imm32_ == 0); - opGroup_ = SuperscalarInstructionType::IROR_C; - opGroupPar_ = -1; - } break; - - case SuperscalarInstructionType::IADD_C7: - case SuperscalarInstructionType::IADD_C8: - case SuperscalarInstructionType::IADD_C9: { - mod_ = 0; - imm32_ = gen.getUInt32(); - opGroup_ = SuperscalarInstructionType::IADD_C7; - opGroupPar_ = -1; - } break; - - case SuperscalarInstructionType::IXOR_C7: - case SuperscalarInstructionType::IXOR_C8: - case SuperscalarInstructionType::IXOR_C9: { - mod_ = 0; - imm32_ = gen.getUInt32(); - opGroup_ = SuperscalarInstructionType::IXOR_C7; - opGroupPar_ = -1; - } break; - - case SuperscalarInstructionType::IMULH_R: { - canReuse_ = true; - mod_ = 0; - imm32_ = 0; - opGroup_ = SuperscalarInstructionType::IMULH_R; - opGroupPar_ = gen.getUInt32(); - } break; - - case SuperscalarInstructionType::ISMULH_R: { - canReuse_ = true; - mod_ = 0; - imm32_ = 0; - opGroup_ = SuperscalarInstructionType::ISMULH_R; - opGroupPar_ = gen.getUInt32(); - } break; - - case SuperscalarInstructionType::IMUL_RCP: { - mod_ = 0; - do { - imm32_ = gen.getUInt32(); - } while (isZeroOrPowerOf2(imm32_)); - opGroup_ = SuperscalarInstructionType::IMUL_RCP; - opGroupPar_ = -1; - } break; - - default: - break; - } - } - - bool selectDestination(int cycle, bool allowChainedMul, RegisterInfo (®isters)[8], Blake2Generator& gen) { - /*if (allowChainedMultiplication && opGroup_ == SuperscalarInstructionType::IMUL_R) - std::cout << "Selecting destination with chained MUL enabled" << std::endl;*/ - std::vector availableRegisters; - //Conditions for the destination register: - // * value must be ready at the required cycle - // * cannot be the same as the source register unless the instruction allows it - // - this avoids optimizable instructions such as "xor r, r" or "sub r, r" - // * register cannot be multiplied twice in a row unless allowChainedMul is true - // - this avoids accumulation of trailing zeroes in registers due to excessive multiplication - // - allowChainedMul is set to true if an attempt to find source/destination registers failed (this is quite rare, but prevents a catastrophic failure of the generator) - // * either the last instruction applied to the register or its source must be different than this instruction - // - this avoids optimizable instruction sequences such as "xor r1, r2; xor r1, r2" or "ror r, C1; ror r, C2" or "add r, C1; add r, C2" - // * register r5 cannot be the destination of the IADD_RS instruction (limitation of the x86 lea instruction) - for (unsigned i = 0; i < 8; ++i) { - if (registers[i].latency <= cycle && (canReuse_ || i != src_) && (allowChainedMul || opGroup_ != SuperscalarInstructionType::IMUL_R || registers[i].lastOpGroup != SuperscalarInstructionType::IMUL_R) && (registers[i].lastOpGroup != opGroup_ || registers[i].lastOpPar != opGroupPar_) && (info_->getType() != SuperscalarInstructionType::IADD_RS || i != RegisterNeedsDisplacement)) - availableRegisters.push_back(i); - } - return selectRegister(availableRegisters, gen, dst_); - } - - bool selectSource(int cycle, RegisterInfo(®isters)[8], Blake2Generator& gen) { - std::vector availableRegisters; - //all registers that are ready at the cycle - for (unsigned i = 0; i < 8; ++i) { - if (registers[i].latency <= cycle) - availableRegisters.push_back(i); - } - //if there are only 2 available registers for IADD_RS and one of them is r5, select it as the source because it cannot be the destination - if (availableRegisters.size() == 2 && info_->getType() == SuperscalarInstructionType::IADD_RS) { - if (availableRegisters[0] == RegisterNeedsDisplacement || availableRegisters[1] == RegisterNeedsDisplacement) { - opGroupPar_ = src_ = RegisterNeedsDisplacement; - return true; - } - } - if (selectRegister(availableRegisters, gen, src_)) { - if (groupParIsSource_) - opGroupPar_ = src_; - return true; - } - return false; - } - - SuperscalarInstructionType getType() { - return info_->getType(); - } - int getSource() { - return src_; - } - int getDestination() { - return dst_; - } - SuperscalarInstructionType getGroup() { - return opGroup_; - } - int getGroupPar() { - return opGroupPar_; - } - - const SuperscalarInstructionInfo& getInfo() const { - return *info_; - } - - static const SuperscalarInstruction Null; - - private: - const SuperscalarInstructionInfo* info_; - int src_ = -1; - int dst_ = -1; - int mod_; - uint32_t imm32_; - SuperscalarInstructionType opGroup_; - int opGroupPar_; - bool canReuse_ = false; - bool groupParIsSource_ = false; - - void reset() { - src_ = dst_ = -1; - canReuse_ = groupParIsSource_ = false; - } - - SuperscalarInstruction(const SuperscalarInstructionInfo* info) : info_(info) { - } - }; - - const SuperscalarInstruction SuperscalarInstruction::Null = SuperscalarInstruction(&SuperscalarInstructionInfo::NOP); - - constexpr int CYCLE_MAP_SIZE = RANDOMX_SUPERSCALAR_LATENCY + 4; - constexpr int LOOK_FORWARD_CYCLES = 4; - constexpr int MAX_THROWAWAY_COUNT = 256; - - template - static int scheduleUop(ExecutionPort::type uop, ExecutionPort::type(&portBusy)[CYCLE_MAP_SIZE][3], int cycle) { - //The scheduling here is done optimistically by checking port availability in order P5 -> P0 -> P1 to not overload - //port P1 (multiplication) by instructions that can go to any port. - for (; cycle < CYCLE_MAP_SIZE; ++cycle) { - if ((uop & ExecutionPort::P5) != 0 && !portBusy[cycle][2]) { - if (commit) { - if (trace) std::cout << "; P5 at cycle " << cycle << std::endl; - portBusy[cycle][2] = uop; - } - return cycle; - } - if ((uop & ExecutionPort::P0) != 0 && !portBusy[cycle][0]) { - if (commit) { - if (trace) std::cout << "; P0 at cycle " << cycle << std::endl; - portBusy[cycle][0] = uop; - } - return cycle; - } - if ((uop & ExecutionPort::P1) != 0 && !portBusy[cycle][1]) { - if (commit) { - if (trace) std::cout << "; P1 at cycle " << cycle << std::endl; - portBusy[cycle][1] = uop; - } - return cycle; - } - } - return -1; - } - - template - static int scheduleMop(const MacroOp& mop, ExecutionPort::type(&portBusy)[CYCLE_MAP_SIZE][3], int cycle, int depCycle) { - //if this macro-op depends on the previous one, increase the starting cycle if needed - //this handles an explicit dependency chain in IMUL_RCP - if (mop.isDependent()) { - cycle = std::max(cycle, depCycle); - } - //move instructions are eliminated and don't need an execution unit - if (mop.isEliminated()) { - if (commit) - if (trace) std::cout << "; (eliminated)" << std::endl; - return cycle; - } - else if (mop.isSimple()) { - //this macro-op has only one uOP - return scheduleUop(mop.getUop1(), portBusy, cycle); - } - else { - //macro-ops with 2 uOPs are scheduled conservatively by requiring both uOPs to execute in the same cycle - for (; cycle < CYCLE_MAP_SIZE; ++cycle) { - - int cycle1 = scheduleUop(mop.getUop1(), portBusy, cycle); - int cycle2 = scheduleUop(mop.getUop2(), portBusy, cycle); - - if (cycle1 >= 0 && cycle1 == cycle2) { - if (commit) { - scheduleUop(mop.getUop1(), portBusy, cycle1); - scheduleUop(mop.getUop2(), portBusy, cycle2); - } - return cycle1; - } - } - } - - return -1; - } - - void generateSuperscalar(SuperscalarProgram& prog, Blake2Generator& gen) { - - ExecutionPort::type portBusy[CYCLE_MAP_SIZE][3]; - memset(portBusy, 0, sizeof(portBusy)); - RegisterInfo registers[8]; - - const DecoderBuffer* decodeBuffer = &DecoderBuffer::Default; - SuperscalarInstruction currentInstruction = SuperscalarInstruction::Null; - int macroOpIndex = 0; - int codeSize = 0; - int macroOpCount = 0; - int cycle = 0; - int depCycle = 0; - int retireCycle = 0; - bool portsSaturated = false; - int programSize = 0; - int mulCount = 0; - int decodeCycle; - int throwAwayCount = 0; - - //decode instructions for RANDOMX_SUPERSCALAR_LATENCY cycles or until an execution port is saturated. - //Each decode cycle decodes 16 bytes of x86 code. - //Since a decode cycle produces on average 3.45 macro-ops and there are only 3 ALU ports, execution ports are always - //saturated first. The cycle limit is present only to guarantee loop termination. - //Program size is limited to SuperscalarMaxSize instructions. - for (decodeCycle = 0; decodeCycle < RANDOMX_SUPERSCALAR_LATENCY && !portsSaturated && programSize < SuperscalarMaxSize; ++decodeCycle) { - - //select a decode configuration - decodeBuffer = decodeBuffer->fetchNext(currentInstruction.getType(), decodeCycle, mulCount, gen); - if (trace) std::cout << "; ------------- fetch cycle " << cycle << " (" << decodeBuffer->getName() << ")" << std::endl; - - int bufferIndex = 0; - - //fill all instruction slots in the current decode buffer - while (bufferIndex < decodeBuffer->getSize()) { - int topCycle = cycle; - - //if we have issued all macro-ops for the current RandomX instruction, create a new instruction - if (macroOpIndex >= currentInstruction.getInfo().getSize()) { - if (portsSaturated || programSize >= SuperscalarMaxSize) - break; - //select an instruction so that the first macro-op fits into the current slot - currentInstruction.createForSlot(gen, decodeBuffer->getCounts()[bufferIndex], decodeBuffer->getIndex(), decodeBuffer->getSize() == bufferIndex + 1, bufferIndex == 0); - macroOpIndex = 0; - if (trace) std::cout << "; " << currentInstruction.getInfo().getName() << std::endl; - } - const MacroOp& mop = currentInstruction.getInfo().getOp(macroOpIndex); - if (trace) std::cout << mop.getName() << " "; - - //calculate the earliest cycle when this macro-op (all of its uOPs) can be scheduled for execution - int scheduleCycle = scheduleMop(mop, portBusy, cycle, depCycle); - if (scheduleCycle < 0) { - if (trace) std::cout << "Unable to map operation '" << mop.getName() << "' to execution port (cycle " << cycle << ")" << std::endl; - //__debugbreak(); - portsSaturated = true; - break; - } - - //find a source register (if applicable) that will be ready when this instruction executes - if (macroOpIndex == currentInstruction.getInfo().getSrcOp()) { - int forward; - //if no suitable operand is ready, look up to LOOK_FORWARD_CYCLES forward - for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectSource(scheduleCycle, registers, gen); ++forward) { - if (trace) std::cout << "; src STALL at cycle " << cycle << std::endl; - ++scheduleCycle; - ++cycle; - } - //if no register was found, throw the instruction away and try another one - if (forward == LOOK_FORWARD_CYCLES) { - if (throwAwayCount < MAX_THROWAWAY_COUNT) { - throwAwayCount++; - macroOpIndex = currentInstruction.getInfo().getSize(); - if (trace) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl; - //cycle = topCycle; - continue; - } - //abort this decode buffer - if (trace) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available for operation " << currentInstruction.getInfo().getName() << std::endl; - currentInstruction = SuperscalarInstruction::Null; - break; - } - if (trace) std::cout << "; src = r" << currentInstruction.getSource() << std::endl; - } - //find a destination register that will be ready when this instruction executes - if (macroOpIndex == currentInstruction.getInfo().getDstOp()) { - int forward; - for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectDestination(scheduleCycle, throwAwayCount > 0, registers, gen); ++forward) { - if (trace) std::cout << "; dst STALL at cycle " << cycle << std::endl; - ++scheduleCycle; - ++cycle; - } - if (forward == LOOK_FORWARD_CYCLES) { //throw instruction away - if (throwAwayCount < MAX_THROWAWAY_COUNT) { - throwAwayCount++; - macroOpIndex = currentInstruction.getInfo().getSize(); - if (trace) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl; - //cycle = topCycle; - continue; - } - //abort this decode buffer - if (trace) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - destination registers not available" << std::endl; - currentInstruction = SuperscalarInstruction::Null; - break; - } - if (trace) std::cout << "; dst = r" << currentInstruction.getDestination() << std::endl; - } - throwAwayCount = 0; - - //recalculate when the instruction can be scheduled for execution based on operand availability - scheduleCycle = scheduleMop(mop, portBusy, scheduleCycle, scheduleCycle); - - if (scheduleCycle < 0) { - if (trace) std::cout << "Unable to map operation '" << mop.getName() << "' to execution port (cycle " << scheduleCycle << ")" << std::endl; - portsSaturated = true; - break; - } - - //calculate when the result will be ready - depCycle = scheduleCycle + mop.getLatency(); - - //if this instruction writes the result, modify register information - // RegisterInfo.latency - which cycle the register will be ready - // RegisterInfo.lastOpGroup - the last operation that was applied to the register - // RegisterInfo.lastOpPar - the last operation source value (-1 = constant, 0-7 = register) - if (macroOpIndex == currentInstruction.getInfo().getResultOp()) { - int dst = currentInstruction.getDestination(); - RegisterInfo& ri = registers[dst]; - retireCycle = depCycle; - ri.latency = retireCycle; - ri.lastOpGroup = currentInstruction.getGroup(); - ri.lastOpPar = currentInstruction.getGroupPar(); - if (trace) std::cout << "; RETIRED at cycle " << retireCycle << std::endl; - } - codeSize += mop.getSize(); - bufferIndex++; - macroOpIndex++; - macroOpCount++; - - //terminating condition - if (scheduleCycle >= RANDOMX_SUPERSCALAR_LATENCY) { - portsSaturated = true; - } - cycle = topCycle; - - //when all macro-ops of the current instruction have been issued, add the instruction into the program - if (macroOpIndex >= currentInstruction.getInfo().getSize()) { - currentInstruction.toInstr(prog(programSize++)); - mulCount += isMultiplication(currentInstruction.getType()); - } - } - ++cycle; - } - - double ipc = (macroOpCount / (double)retireCycle); - - memset(prog.asicLatencies, 0, sizeof(prog.asicLatencies)); - - //Calculate ASIC latency: - //Assumes 1 cycle latency for all operations and unlimited parallelization. - for (int i = 0; i < programSize; ++i) { - Instruction& instr = prog(i); - int latDst = prog.asicLatencies[instr.dst] + 1; - int latSrc = instr.dst != instr.src ? prog.asicLatencies[instr.src] + 1 : 0; - prog.asicLatencies[instr.dst] = std::max(latDst, latSrc); - } - - //address register is the register with the highest ASIC latency - int asicLatencyMax = 0; - int addressReg = 0; - for (int i = 0; i < 8; ++i) { - if (prog.asicLatencies[i] > asicLatencyMax) { - asicLatencyMax = prog.asicLatencies[i]; - addressReg = i; - } - prog.cpuLatencies[i] = registers[i].latency; - } - - prog.setSize(programSize); - prog.setAddressRegister(addressReg); - - prog.cpuLatency = retireCycle; - prog.asicLatency = asicLatencyMax; - prog.codeSize = codeSize; - prog.macroOps = macroOpCount; - prog.decodeCycles = decodeCycle; - prog.ipc = ipc; - prog.mulCount = mulCount; - - - /*if(INFO) std::cout << "; ALU port utilization:" << std::endl; - if (INFO) std::cout << "; (* = in use, _ = idle)" << std::endl; - - int portCycles = 0; - for (int i = 0; i < CYCLE_MAP_SIZE; ++i) { - std::cout << "; " << std::setw(3) << i << " "; - for (int j = 0; j < 3; ++j) { - std::cout << (portBusy[i][j] ? '*' : '_'); - portCycles += !!portBusy[i][j]; - } - std::cout << std::endl; - }*/ - } - - void executeSuperscalar(int_reg_t(&r)[8], SuperscalarProgram& prog, std::vector *reciprocals) { - for (unsigned j = 0; j < prog.getSize(); ++j) { - Instruction& instr = prog(j); - switch ((SuperscalarInstructionType)instr.opcode) - { - case SuperscalarInstructionType::ISUB_R: - r[instr.dst] -= r[instr.src]; - break; - case SuperscalarInstructionType::IXOR_R: - r[instr.dst] ^= r[instr.src]; - break; - case SuperscalarInstructionType::IADD_RS: - r[instr.dst] += r[instr.src] << instr.getModShift(); - break; - case SuperscalarInstructionType::IMUL_R: - r[instr.dst] *= r[instr.src]; - break; - case SuperscalarInstructionType::IROR_C: - r[instr.dst] = rotr(r[instr.dst], instr.getImm32()); - break; - case SuperscalarInstructionType::IADD_C7: - case SuperscalarInstructionType::IADD_C8: - case SuperscalarInstructionType::IADD_C9: - r[instr.dst] += signExtend2sCompl(instr.getImm32()); - break; - case SuperscalarInstructionType::IXOR_C7: - case SuperscalarInstructionType::IXOR_C8: - case SuperscalarInstructionType::IXOR_C9: - r[instr.dst] ^= signExtend2sCompl(instr.getImm32()); - break; - case SuperscalarInstructionType::IMULH_R: - r[instr.dst] = mulh(r[instr.dst], r[instr.src]); - break; - case SuperscalarInstructionType::ISMULH_R: - r[instr.dst] = smulh(r[instr.dst], r[instr.src]); - break; - case SuperscalarInstructionType::IMUL_RCP: - if (reciprocals != nullptr) - r[instr.dst] *= (*reciprocals)[instr.getImm32()]; - else - r[instr.dst] *= randomx_reciprocal(instr.getImm32()); - break; - default: - UNREACHABLE; - } - } - } -} diff --git a/external/src/randomx/src/superscalar.hpp b/external/src/randomx/src/superscalar.hpp deleted file mode 100644 index bc101c4..0000000 --- a/external/src/randomx/src/superscalar.hpp +++ /dev/null @@ -1,60 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include -#include -#include "superscalar_program.hpp" -#include "blake2_generator.hpp" - -namespace randomx { - // Intel Ivy Bridge reference - enum class SuperscalarInstructionType { //uOPs (decode) execution ports latency code size - ISUB_R = 0, //1 p015 1 3 (sub) - IXOR_R = 1, //1 p015 1 3 (xor) - IADD_RS = 2, //1 p01 1 4 (lea) - IMUL_R = 3, //1 p1 3 4 (imul) - IROR_C = 4, //1 p05 1 4 (ror) - IADD_C7 = 5, //1 p015 1 7 (add) - IXOR_C7 = 6, //1 p015 1 7 (xor) - IADD_C8 = 7, //1+0 p015 1 7+1 (add+nop) - IXOR_C8 = 8, //1+0 p015 1 7+1 (xor+nop) - IADD_C9 = 9, //1+0 p015 1 7+2 (add+nop) - IXOR_C9 = 10, //1+0 p015 1 7+2 (xor+nop) - IMULH_R = 11, //1+2+1 0+(p1,p5)+0 3 3+3+3 (mov+mul+mov) - ISMULH_R = 12, //1+2+1 0+(p1,p5)+0 3 3+3+3 (mov+imul+mov) - IMUL_RCP = 13, //1+1 p015+p1 4 10+4 (mov+imul) - - COUNT = 14, - INVALID = -1 - }; - - void generateSuperscalar(SuperscalarProgram& prog, Blake2Generator& gen); - void executeSuperscalar(uint64_t(&r)[8], SuperscalarProgram& prog, std::vector *reciprocals = nullptr); -} \ No newline at end of file diff --git a/external/src/randomx/src/superscalar_program.hpp b/external/src/randomx/src/superscalar_program.hpp deleted file mode 100644 index 7bcd484..0000000 --- a/external/src/randomx/src/superscalar_program.hpp +++ /dev/null @@ -1,84 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include -#include "instruction.hpp" -#include "common.hpp" - -namespace randomx { - - class SuperscalarProgram { - public: - Instruction& operator()(int pc) { - return programBuffer[pc]; - } - friend std::ostream& operator<<(std::ostream& os, const SuperscalarProgram& p) { - p.print(os); - return os; - } - uint32_t getSize() { - return size; - } - void setSize(uint32_t val) { - size = val; - } - int getAddressRegister() { - return addrReg; - } - void setAddressRegister(int val) { - addrReg = val; - } - - Instruction programBuffer[SuperscalarMaxSize]; - uint32_t size -#ifndef NDEBUG - = 0 -#endif - ; - int addrReg; - double ipc; - int codeSize; - int macroOps; - int decodeCycles; - int cpuLatency; - int asicLatency; - int mulCount; - int cpuLatencies[8]; - int asicLatencies[8]; - private: - void print(std::ostream& os) const { - for (unsigned i = 0; i < size; ++i) { - auto instr = programBuffer[i]; - os << instr; - } - } - }; - -} \ No newline at end of file diff --git a/external/src/randomx/src/tests/affinity.cpp b/external/src/randomx/src/tests/affinity.cpp deleted file mode 100644 index b090d47..0000000 --- a/external/src/randomx/src/tests/affinity.cpp +++ /dev/null @@ -1,117 +0,0 @@ -/* -Copyright (c) 2019, jtgrassie -Copyright (c) 2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include - -#if defined(_WIN32) || defined(__CYGWIN__) - #include -#else - #ifdef __APPLE__ - #include - #include - #endif - #include -#endif -#include "affinity.hpp" - -int -set_thread_affinity(const unsigned &cpuid) -{ - std::thread::native_handle_type thread; -#if defined(_WIN32) || defined(__CYGWIN__) - thread = reinterpret_cast(GetCurrentThread()); -#else - thread = static_cast(pthread_self()); -#endif - return set_thread_affinity(thread, cpuid); -} - -int -set_thread_affinity(std::thread::native_handle_type thread, - const unsigned &cpuid) -{ - int rc = -1; -#ifdef __APPLE__ - thread_port_t mach_thread; - thread_affinity_policy_data_t policy = { static_cast(cpuid) }; - mach_thread = pthread_mach_thread_np(thread); - rc = thread_policy_set(mach_thread, THREAD_AFFINITY_POLICY, - (thread_policy_t)&policy, 1); -#elif defined(_WIN32) || defined(__CYGWIN__) - rc = SetThreadAffinityMask(reinterpret_cast(thread), 1ULL << cpuid) == 0 ? -2 : 0; -#elif !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__ANDROID__) && !defined(__NetBSD__) - cpu_set_t cs; - CPU_ZERO(&cs); - CPU_SET(cpuid, &cs); - rc = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cs); -#endif - return rc; -} - -unsigned -cpuid_from_mask(uint64_t mask, const unsigned &thread_index) -{ - static unsigned lookup[64]; - static bool init = false; - if (init) - return lookup[thread_index]; - unsigned count_found = 0; - for (unsigned i=0; i<64; i++) - { - if (1ULL & mask) - { - lookup[count_found] = i; - count_found++; - } - mask >>= 1; - } - init = true; - return lookup[thread_index]; -} - -std::string -mask_to_string(uint64_t mask) -{ - std::ostringstream ss; - unsigned len = 0; - unsigned v = 0; - unsigned i = 64; - while (i--) - { - v = mask >> i; - if (1ULL & v) - { - if (len == 0) len = i + 1; - ss << '1'; - } - else - if (len > 0) ss << '0'; - } - return ss.str(); -} diff --git a/external/src/randomx/src/tests/affinity.hpp b/external/src/randomx/src/tests/affinity.hpp deleted file mode 100644 index db9e9a9..0000000 --- a/external/src/randomx/src/tests/affinity.hpp +++ /dev/null @@ -1,39 +0,0 @@ -/* -Copyright (c) 2019, jtgrassie - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include -#include -#include - -int set_thread_affinity(const unsigned &cpuid); -int set_thread_affinity(std::thread::native_handle_type thread, - const unsigned &cpuid); -unsigned cpuid_from_mask(uint64_t mask, const unsigned &thread_index); -std::string mask_to_string(uint64_t mask); diff --git a/external/src/randomx/src/tests/api-example1.c b/external/src/randomx/src/tests/api-example1.c deleted file mode 100644 index e5f8526..0000000 --- a/external/src/randomx/src/tests/api-example1.c +++ /dev/null @@ -1,25 +0,0 @@ -#include "../randomx.h" -#include - -int main() { - const char myKey[] = "RandomX example key"; - const char myInput[] = "RandomX example input"; - char hash[RANDOMX_HASH_SIZE]; - - randomx_flags flags = randomx_get_flags(); - randomx_cache *myCache = randomx_alloc_cache(flags); - randomx_init_cache(myCache, &myKey, sizeof myKey); - randomx_vm *myMachine = randomx_create_vm(flags, myCache, NULL); - - randomx_calculate_hash(myMachine, &myInput, sizeof myInput, hash); - - randomx_destroy_vm(myMachine); - randomx_release_cache(myCache); - - for (unsigned i = 0; i < RANDOMX_HASH_SIZE; ++i) - printf("%02x", hash[i] & 0xff); - - printf("\n"); - - return 0; -} diff --git a/external/src/randomx/src/tests/api-example2.cpp b/external/src/randomx/src/tests/api-example2.cpp deleted file mode 100644 index 610aaa8..0000000 --- a/external/src/randomx/src/tests/api-example2.cpp +++ /dev/null @@ -1,51 +0,0 @@ -#include "../randomx.h" -#include -#include -#include - -int main() { - const char myKey[] = "RandomX example key"; - const char myInput[] = "RandomX example input"; - char hash[RANDOMX_HASH_SIZE]; - - randomx_flags flags = randomx_get_flags(); - flags |= RANDOMX_FLAG_LARGE_PAGES; - flags |= RANDOMX_FLAG_FULL_MEM; - randomx_cache *myCache = randomx_alloc_cache(flags); - if (myCache == nullptr) { - std::cout << "Cache allocation failed" << std::endl; - return 1; - } - randomx_init_cache(myCache, myKey, sizeof myKey); - - randomx_dataset *myDataset = randomx_alloc_dataset(flags); - if (myDataset == nullptr) { - std::cout << "Dataset allocation failed" << std::endl; - return 1; - } - - auto datasetItemCount = randomx_dataset_item_count(); - std::thread t1(&randomx_init_dataset, myDataset, myCache, 0, datasetItemCount / 2); - std::thread t2(&randomx_init_dataset, myDataset, myCache, datasetItemCount / 2, datasetItemCount - datasetItemCount / 2); - t1.join(); - t2.join(); - randomx_release_cache(myCache); - - randomx_vm *myMachine = randomx_create_vm(flags, nullptr, myDataset); - if (myMachine == nullptr) { - std::cout << "Failed to create a virtual machine" << std::endl; - return 1; - } - - randomx_calculate_hash(myMachine, &myInput, sizeof myInput, hash); - - randomx_destroy_vm(myMachine); - randomx_release_dataset(myDataset); - - for (unsigned i = 0; i < RANDOMX_HASH_SIZE; ++i) - std::cout << std::hex << std::setw(2) << std::setfill('0') << ((int)hash[i] & 0xff); - - std::cout << std::endl; - - return 0; -} \ No newline at end of file diff --git a/external/src/randomx/src/tests/benchmark.cpp b/external/src/randomx/src/tests/benchmark.cpp deleted file mode 100644 index 09a0bc3..0000000 --- a/external/src/randomx/src/tests/benchmark.cpp +++ /dev/null @@ -1,407 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "stopwatch.hpp" -#include "utility.hpp" -#include "../randomx.h" -#include "../dataset.hpp" -#include "../blake2/endian.h" -#include "../common.hpp" -#include "../jit_compiler.hpp" -#ifdef _WIN32 -#include -#include -#endif -#include "affinity.hpp" - -const uint8_t blockTemplate_[] = { - 0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14, - 0x5a, 0xc5, 0xfa, 0xd3, 0xaa, 0x3a, 0xf6, 0xea, 0x44, 0xc1, 0x18, 0x69, 0xdc, 0x4f, 0x85, 0x3f, 0x00, 0x2b, 0x2e, - 0xea, 0x00, 0x00, 0x00, 0x00, 0x77, 0xb2, 0x06, 0xa0, 0x2c, 0xa5, 0xb1, 0xd4, 0xce, 0x6b, 0xbf, 0xdf, 0x0a, 0xca, - 0xc3, 0x8b, 0xde, 0xd3, 0x4d, 0x2d, 0xcd, 0xee, 0xf9, 0x5c, 0xd2, 0x0c, 0xef, 0xc1, 0x2f, 0x61, 0xd5, 0x61, 0x09 -}; - -class AtomicHash { -public: - AtomicHash() { - for (int i = 0; i < 4; ++i) - hash[i].store(0); - } - void xorWith(uint64_t update[4]) { - for (int i = 0; i < 4; ++i) - hash[i].fetch_xor(update[i]); - } - void print(std::ostream& os) { - for (int i = 0; i < 4; ++i) - print(hash[i], os); - os << std::endl; - } -private: - static void print(std::atomic& hash, std::ostream& os) { - auto h = hash.load(); - outputHex(std::cout, (char*)&h, sizeof(h)); - } - std::atomic hash[4]; -}; - -void printUsage(const char* executable) { - std::cout << "Usage: " << executable << " [OPTIONS]" << std::endl; - std::cout << "Supported options:" << std::endl; - std::cout << " --help shows this message" << std::endl; - std::cout << " --mine mining mode: 2080 MiB" << std::endl; - std::cout << " --verify verification mode: 256 MiB" << std::endl; - std::cout << " --jit JIT compiled mode (default: interpreter)" << std::endl; - std::cout << " --secure W^X policy for JIT pages (default: off)" << std::endl; - std::cout << " --largePages use large pages (default: small pages)" << std::endl; - std::cout << " --softAes use software AES (default: hardware AES)" << std::endl; - std::cout << " --threads T use T threads (default: 1)" << std::endl; - std::cout << " --affinity A thread affinity bitmask (default: 0)" << std::endl; - std::cout << " --init Q initialize dataset with Q threads (default: 1)" << std::endl; - std::cout << " --nonces N run N nonces (default: 1000)" << std::endl; - std::cout << " --seed S seed for cache initialization (default: 0)" << std::endl; - std::cout << " --ssse3 use optimized Argon2 for SSSE3 CPUs" << std::endl; - std::cout << " --avx2 use optimized Argon2 for AVX2 CPUs" << std::endl; - std::cout << " --auto select the best options for the current CPU" << std::endl; - std::cout << " --noBatch calculate hashes one by one (default: batch)" << std::endl; -} - -struct MemoryException : public std::exception { -}; -struct CacheAllocException : public MemoryException { - const char * what() const throw () { - return "Cache allocation failed"; - } -}; -struct DatasetAllocException : public MemoryException { - const char * what() const throw () { - return "Dataset allocation failed"; - } -}; - -using MineFunc = void(randomx_vm * vm, std::atomic & atomicNonce, AtomicHash & result, uint32_t noncesCount, int thread, int cpuid); - -template -void mine(randomx_vm* vm, std::atomic& atomicNonce, AtomicHash& result, uint32_t noncesCount, int thread, int cpuid = -1) { - if (cpuid >= 0) { - int rc = set_thread_affinity(cpuid); - if (rc) { - std::cerr << "Failed to set thread affinity for thread " << thread << " (error=" << rc << ")" << std::endl; - } - } - uint64_t hash[RANDOMX_HASH_SIZE / sizeof(uint64_t)]; - uint8_t blockTemplate[sizeof(blockTemplate_)]; - memcpy(blockTemplate, blockTemplate_, sizeof(blockTemplate)); - void* noncePtr = blockTemplate + 39; - auto nonce = atomicNonce.fetch_add(1); - - if (batch) { - store32(noncePtr, nonce); - randomx_calculate_hash_first(vm, blockTemplate, sizeof(blockTemplate)); - } - - while (nonce < noncesCount) { - if (batch) { - nonce = atomicNonce.fetch_add(1); - } - store32(noncePtr, nonce); - (batch ? randomx_calculate_hash_next : randomx_calculate_hash)(vm, blockTemplate, sizeof(blockTemplate), &hash); - result.xorWith(hash); - if (!batch) { - nonce = atomicNonce.fetch_add(1); - } - } -} - -int main(int argc, char** argv) { - bool softAes, miningMode, verificationMode, help, largePages, jit, secure; - bool ssse3, avx2, autoFlags, noBatch; - int noncesCount, threadCount, initThreadCount; - uint64_t threadAffinity; - int32_t seedValue; - char seed[4]; - - readOption("--softAes", argc, argv, softAes); - readOption("--mine", argc, argv, miningMode); - readOption("--verify", argc, argv, verificationMode); - readIntOption("--threads", argc, argv, threadCount, 1); - readUInt64Option("--affinity", argc, argv, threadAffinity, 0); - readIntOption("--nonces", argc, argv, noncesCount, 1000); - readIntOption("--init", argc, argv, initThreadCount, 1); - readIntOption("--seed", argc, argv, seedValue, 0); - readOption("--largePages", argc, argv, largePages); - if (!largePages) { - readOption("--largepages", argc, argv, largePages); - } - readOption("--jit", argc, argv, jit); - readOption("--help", argc, argv, help); - readOption("--secure", argc, argv, secure); - readOption("--ssse3", argc, argv, ssse3); - readOption("--avx2", argc, argv, avx2); - readOption("--auto", argc, argv, autoFlags); - readOption("--noBatch", argc, argv, noBatch); - - store32(&seed, seedValue); - - std::cout << "RandomX benchmark v1.1.8" << std::endl; - - if (help) { - printUsage(argv[0]); - return 0; - } - - if (!miningMode && !verificationMode) { - std::cout << "Please select either the fast mode (--mine) or the slow mode (--verify)" << std::endl; - std::cout << "Run '" << argv[0] << " --help' to see all supported options" << std::endl; - return 0; - } - - std::atomic atomicNonce(0); - AtomicHash result; - std::vector vms; - std::vector threads; - randomx_dataset* dataset; - randomx_cache* cache; - randomx_flags flags; - - if (autoFlags) { - initThreadCount = std::thread::hardware_concurrency(); - flags = randomx_get_flags(); - } - else { - flags = RANDOMX_FLAG_DEFAULT; - if (ssse3) { - flags |= RANDOMX_FLAG_ARGON2_SSSE3; - } - if (avx2) { - flags |= RANDOMX_FLAG_ARGON2_AVX2; - } - if (!softAes) { - flags |= RANDOMX_FLAG_HARD_AES; - } - if (jit) { - flags |= RANDOMX_FLAG_JIT; -#ifdef RANDOMX_FORCE_SECURE - flags |= RANDOMX_FLAG_SECURE; -#endif - } - } - - if (largePages) { - flags |= RANDOMX_FLAG_LARGE_PAGES; - } - if (miningMode) { - flags |= RANDOMX_FLAG_FULL_MEM; - } -#ifndef RANDOMX_FORCE_SECURE - if (secure) { - flags |= RANDOMX_FLAG_SECURE; - } -#endif - - if (flags & RANDOMX_FLAG_ARGON2_AVX2) { - std::cout << " - Argon2 implementation: AVX2" << std::endl; - } - else if (flags & RANDOMX_FLAG_ARGON2_SSSE3) { - std::cout << " - Argon2 implementation: SSSE3" << std::endl; - } - else { - std::cout << " - Argon2 implementation: reference" << std::endl; - } - - if (flags & RANDOMX_FLAG_FULL_MEM) { - std::cout << " - full memory mode (2080 MiB)" << std::endl; - } - else { - std::cout << " - light memory mode (256 MiB)" << std::endl; - } - - if (flags & RANDOMX_FLAG_JIT) { - std::cout << " - JIT compiled mode "; - if (flags & RANDOMX_FLAG_SECURE) { - std::cout << "(secure)"; - } - std::cout << std::endl; - } - else { - std::cout << " - interpreted mode" << std::endl; - } - - if (flags & RANDOMX_FLAG_HARD_AES) { - std::cout << " - hardware AES mode" << std::endl; - } - else { - std::cout << " - software AES mode" << std::endl; - } - - if (flags & RANDOMX_FLAG_LARGE_PAGES) { - std::cout << " - large pages mode" << std::endl; - } - else { - std::cout << " - small pages mode" << std::endl; - } - - if (threadAffinity) { - std::cout << " - thread affinity (" << mask_to_string(threadAffinity) << ")" << std::endl; - } - - MineFunc* func; - - if (noBatch) { - func = &mine; - } - else { - func = &mine; - std::cout << " - batch mode" << std::endl; - } - - std::cout << "Initializing"; - if (miningMode) - std::cout << " (" << initThreadCount << " thread" << (initThreadCount > 1 ? "s)" : ")"); - std::cout << " ..." << std::endl; - - try { - if (nullptr == randomx::selectArgonImpl(flags)) { - throw std::runtime_error("Unsupported Argon2 implementation"); - } - if ((flags & RANDOMX_FLAG_JIT) && !RANDOMX_HAVE_COMPILER) { - throw std::runtime_error("JIT compilation is not supported on this platform. Try without --jit"); - } - if (!(flags & RANDOMX_FLAG_JIT) && RANDOMX_HAVE_COMPILER) { - std::cout << "WARNING: You are using the interpreter mode. Use --jit for optimal performance." << std::endl; - } - - Stopwatch sw(true); - cache = randomx_alloc_cache(flags); - if (cache == nullptr) { - throw CacheAllocException(); - } - randomx_init_cache(cache, &seed, sizeof(seed)); - if (miningMode) { - dataset = randomx_alloc_dataset(flags); - if (dataset == nullptr) { - throw DatasetAllocException(); - } - uint32_t datasetItemCount = randomx_dataset_item_count(); - if (initThreadCount > 1) { - auto perThread = datasetItemCount / initThreadCount; - auto remainder = datasetItemCount % initThreadCount; - uint32_t startItem = 0; - for (int i = 0; i < initThreadCount; ++i) { - auto count = perThread + (i == initThreadCount - 1 ? remainder : 0); - threads.push_back(std::thread(&randomx_init_dataset, dataset, cache, startItem, count)); - startItem += count; - } - for (unsigned i = 0; i < threads.size(); ++i) { - threads[i].join(); - } - } - else { - randomx_init_dataset(dataset, cache, 0, datasetItemCount); - } - randomx_release_cache(cache); - cache = nullptr; - threads.clear(); - } - std::cout << "Memory initialized in " << sw.getElapsed() << " s" << std::endl; - std::cout << "Initializing " << threadCount << " virtual machine(s) ..." << std::endl; - for (int i = 0; i < threadCount; ++i) { - randomx_vm *vm = randomx_create_vm(flags, cache, dataset); - if (vm == nullptr) { - if ((flags & RANDOMX_FLAG_HARD_AES)) { - throw std::runtime_error("Cannot create VM with the selected options. Try using --softAes"); - } - if (largePages) { - throw std::runtime_error("Cannot create VM with the selected options. Try without --largePages"); - } - throw std::runtime_error("Cannot create VM"); - } - vms.push_back(vm); - } - std::cout << "Running benchmark (" << noncesCount << " nonces) ..." << std::endl; - sw.restart(); - if (threadCount > 1) { - for (unsigned i = 0; i < vms.size(); ++i) { - int cpuid = -1; - if (threadAffinity) - cpuid = cpuid_from_mask(threadAffinity, i); - threads.push_back(std::thread(func, vms[i], std::ref(atomicNonce), std::ref(result), noncesCount, i, cpuid)); - } - for (unsigned i = 0; i < threads.size(); ++i) { - threads[i].join(); - } - } - else { - func(vms[0], std::ref(atomicNonce), std::ref(result), noncesCount, 0, -1); - } - - double elapsed = sw.getElapsed(); - for (unsigned i = 0; i < vms.size(); ++i) - randomx_destroy_vm(vms[i]); - if (miningMode) - randomx_release_dataset(dataset); - else - randomx_release_cache(cache); - std::cout << "Calculated result: "; - result.print(std::cout); - if (noncesCount == 1000 && seedValue == 0) - std::cout << "Reference result: 10b649a3f15c7c7f88277812f2e74b337a0f20ce909af09199cccb960771cfa1" << std::endl; - if (!miningMode) { - std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl; - } - else { - std::cout << "Performance: " << noncesCount / elapsed << " hashes per second" << std::endl; - } - } - catch (MemoryException& e) { - std::cout << "ERROR: " << e.what() << std::endl; - if (largePages) { -#ifdef _WIN32 - std::cout << "To use large pages, please enable the \"Lock Pages in Memory\" policy and reboot." << std::endl; - if (!IsWindows8OrGreater()) { - std::cout << "Additionally, you have to run the benchmark from elevated command prompt." << std::endl; - } -#else - std::cout << "To use large pages, please run: sudo sysctl -w vm.nr_hugepages=1250" << std::endl; -#endif - } - return 1; - } - catch (std::exception& e) { - std::cout << "ERROR: " << e.what() << std::endl; - return 1; - } - return 0; -} diff --git a/external/src/randomx/src/tests/code-generator.cpp b/external/src/randomx/src/tests/code-generator.cpp deleted file mode 100644 index b151c3a..0000000 --- a/external/src/randomx/src/tests/code-generator.cpp +++ /dev/null @@ -1,124 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include "utility.hpp" -#include "../common.hpp" -#include "../assembly_generator_x86.hpp" -#include "../superscalar.hpp" -#include "../aes_hash.hpp" -#include "../blake2/blake2.h" -#include "../program.hpp" - -const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 }; - -const uint8_t blockTemplate_[] = { - 0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14, - 0x5a, 0xc5, 0xfa, 0xd3, 0xaa, 0x3a, 0xf6, 0xea, 0x44, 0xc1, 0x18, 0x69, 0xdc, 0x4f, 0x85, 0x3f, 0x00, 0x2b, 0x2e, - 0xea, 0x00, 0x00, 0x00, 0x00, 0x77, 0xb2, 0x06, 0xa0, 0x2c, 0xa5, 0xb1, 0xd4, 0xce, 0x6b, 0xbf, 0xdf, 0x0a, 0xca, - 0xc3, 0x8b, 0xde, 0xd3, 0x4d, 0x2d, 0xcd, 0xee, 0xf9, 0x5c, 0xd2, 0x0c, 0xef, 0xc1, 0x2f, 0x61, 0xd5, 0x61, 0x09 -}; - -template -void generateAsm(uint32_t nonce) { - alignas(16) uint64_t hash[8]; - uint8_t blockTemplate[sizeof(blockTemplate_)]; - memcpy(blockTemplate, blockTemplate_, sizeof(blockTemplate)); - store32(blockTemplate + 39, nonce); - blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0); - uint8_t scratchpad[randomx::ScratchpadSize]; - fillAes1Rx4((void*)hash, randomx::ScratchpadSize, scratchpad); - randomx::AssemblyGeneratorX86 asmX86; - randomx::Program p; - fillAes4Rx4(hash, sizeof(p), &p); - asmX86.generateProgram(p); - asmX86.printCode(std::cout); -} - -template -void generateNative(uint32_t nonce) { - alignas(16) uint64_t hash[8]; - uint8_t blockTemplate[sizeof(blockTemplate_)]; - memcpy(blockTemplate, blockTemplate_, sizeof(blockTemplate)); - store32(blockTemplate + 39, nonce); - blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0); - uint8_t scratchpad[randomx::ScratchpadSize]; - fillAes1Rx4((void*)hash, randomx::ScratchpadSize, scratchpad); - alignas(16) randomx::Program prog; - fillAes1Rx4((void*)hash, sizeof(prog), &prog); - std::cout << prog << std::endl; -} - -void printUsage(const char* executable) { - std::cout << "Usage: " << executable << " [OPTIONS]" << std::endl; - std::cout << "Supported options:" << std::endl; - std::cout << " --softAes use software AES (default: x86 AES-NI)" << std::endl; - std::cout << " --nonce N seed nonce (default: 1000)" << std::endl; - std::cout << " --genAsm generate x86-64 asm code for nonce N" << std::endl; - std::cout << " --genNative generate RandomX code for nonce N" << std::endl; - std::cout << " --genSuperscalar generate superscalar program for nonce N" << std::endl; -} - -int main(int argc, char** argv) { - bool softAes, genAsm, genNative, genSuperscalar; - int nonce; - - readOption("--softAes", argc, argv, softAes); - readOption("--genAsm", argc, argv, genAsm); - readIntOption("--nonce", argc, argv, nonce, 1000); - readOption("--genNative", argc, argv, genNative); - readOption("--genSuperscalar", argc, argv, genSuperscalar); - - if (genSuperscalar) { - randomx::SuperscalarProgram p; - randomx::Blake2Generator gen(seed, nonce); - randomx::generateSuperscalar(p, gen); - randomx::AssemblyGeneratorX86 asmX86; - asmX86.generateAsm(p); - asmX86.printCode(std::cout); - return 0; - } - - if (genAsm) { - if (softAes) - generateAsm(nonce); - else - generateAsm(nonce); - return 0; - } - - if (genNative) { - if (softAes) - generateNative(nonce); - else - generateNative(nonce); - return 0; - } - - printUsage(argv[0]); - return 0; -} \ No newline at end of file diff --git a/external/src/randomx/src/tests/jit-performance.cpp b/external/src/randomx/src/tests/jit-performance.cpp deleted file mode 100644 index 71c0169..0000000 --- a/external/src/randomx/src/tests/jit-performance.cpp +++ /dev/null @@ -1,44 +0,0 @@ -#include "../aes_hash.hpp" -#include "../jit_compiler_x86.hpp" -#include "../program.hpp" -#include "utility.hpp" -#include "stopwatch.hpp" -#include "../blake2/blake2.h" -#include "../reciprocal.h" - -int main(int argc, char** argv) { - int count; - readInt(argc, argv, count, 1000000); - - const char seed[] = "JIT performance test seed"; - uint8_t hash[64]; - - blake2b(&hash, sizeof hash, &seed, sizeof seed, nullptr, 0); - - randomx::ProgramConfiguration config; - - randomx::Program program; - randomx::JitCompilerX86 jit; - - std::cout << "Compiling " << count << " programs..." << std::endl; - - Stopwatch sw(true); - - for (int i = 0; i < count; ++i) { - fillAes1Rx4(hash, sizeof(program), &program); - auto addressRegisters = program.getEntropy(12); - config.readReg0 = 0 + (addressRegisters & 1); - addressRegisters >>= 1; - config.readReg1 = 2 + (addressRegisters & 1); - addressRegisters >>= 1; - config.readReg2 = 4 + (addressRegisters & 1); - addressRegisters >>= 1; - config.readReg3 = 6 + (addressRegisters & 1); - jit.generateProgram(program, config); - } - - std::cout << "Elapsed: " << sw.getElapsed() << " s" << std::endl; - - dump((const char*)jit.getProgramFunc(), jit.getCodeSize(), "program.bin"); - return 0; -} \ No newline at end of file diff --git a/external/src/randomx/src/tests/perf-simulation.cpp b/external/src/randomx/src/tests/perf-simulation.cpp deleted file mode 100644 index 1068a40..0000000 --- a/external/src/randomx/src/tests/perf-simulation.cpp +++ /dev/null @@ -1,662 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include "utility.hpp" -#include "../common.hpp" -#include "../aes_hash.hpp" -#include "../program.hpp" -#include "../blake2/blake2.h" -#include -#include - -int analyze(randomx::Program& p); -int executeInOrder(randomx::Program& p, randomx::Program& original, bool print, int executionPorts, int memoryPorts, bool speculate, int pipeline); -int executeOutOfOrder(randomx::Program& p, randomx::Program& original, bool print, int executionPorts, int memoryPorts, bool speculate, int pipeline); - -constexpr uint32_t DST_NOP = 0; -constexpr uint32_t DST_INT = 1; -constexpr uint32_t DST_FLT = 2; -constexpr uint32_t DST_MEM = 3; -constexpr uint32_t MASK_DST = 3; - -constexpr uint32_t SRC_NOP = 0; -constexpr uint32_t SRC_INT = 4; -constexpr uint32_t SRC_FLT = 8; -constexpr uint32_t SRC_MEM = 12; -constexpr uint32_t MASK_SRC = 12; - -constexpr uint32_t OP_CFROUND = 16; -constexpr uint32_t OP_SWAP = 32; -constexpr uint32_t OP_BRANCH = 48; -constexpr uint32_t MASK_EXT = 48; - -constexpr uint32_t OP_FLOAT = 64; -constexpr uint32_t BRANCH_TARGET = 128; - -//template -void generate(randomx::Program& p, uint32_t nonce) { - alignas(16) uint64_t hash[8]; - blake2b(hash, sizeof(hash), &nonce, sizeof(nonce), nullptr, 0); - fillAes1Rx4((void*)hash, sizeof(p), &p); -} - -bool has(randomx::Instruction& instr, uint32_t mask, uint32_t prop) { - return (instr.opcode & mask) == prop; -} - -bool has(randomx::Instruction& instr, uint32_t prop) { - return (instr.opcode & prop) != 0; -} - -int main(int argc, char** argv) { - int nonces, seed, executionPorts, memoryPorts, pipeline; - bool print, reorder, speculate; - readOption("--print", argc, argv, print); - readOption("--reorder", argc, argv, reorder); - readOption("--speculate", argc, argv, speculate); - readIntOption("--nonces", argc, argv, nonces, 1); - readIntOption("--seed", argc, argv, seed, 0); - readIntOption("--executionPorts", argc, argv, executionPorts, 4); - readIntOption("--memoryPorts", argc, argv, memoryPorts, 2); - readIntOption("--pipeline", argc, argv, pipeline, 3); - randomx::Program p, original; - double totalCycles = 0.0; - double jumpCount = 0; - for (int i = 0; i < nonces; ++i) { - generate(original, i ^ seed); - memcpy(&p, &original, sizeof(p)); - jumpCount += analyze(p); - totalCycles += - reorder - ? - executeOutOfOrder(p, original, print, executionPorts, memoryPorts, speculate, pipeline) - : - executeInOrder(p, original, print, executionPorts, memoryPorts, speculate, pipeline); - } - totalCycles /= nonces; - jumpCount /= nonces; - std::cout << "Execution took " << totalCycles << " cycles per program" << std::endl; - //std::cout << "Jump count: " << jumpCount << std::endl; - return 0; -} - -int executeInOrder(randomx::Program& p, randomx::Program& original, bool print, int executionPorts, int memoryPorts, bool speculate, int pipeline) { - int cycle = pipeline - 1; - int index = 0; - int branchCount = 0; - int int_reg_ready[randomx::RegistersCount] = { 0 }; - int flt_reg_ready[randomx::RegistersCount] = { 0 }; - //each workgroup takes 1 or 2 cycles (2 cycles if any instruction has a memory operand) - while (index < RANDOMX_PROGRAM_SIZE) { - int memoryAccesses = 0; - bool hasRound = false; - int workers = 0; - //std::cout << "-----------" << std::endl; - for (; workers < executionPorts && memoryAccesses < memoryPorts && index < RANDOMX_PROGRAM_SIZE; ++workers) { - auto& instr = p(index); - auto& origi = original(index); - origi.dst %= randomx::RegistersCount; - origi.src %= randomx::RegistersCount; - - //check dependencies - if (has(instr, MASK_SRC, SRC_INT) && int_reg_ready[instr.src] > cycle) - break; - - if (has(instr, MASK_SRC, SRC_MEM) && int_reg_ready[instr.src] > cycle - 1) - break; - - if (has(instr, MASK_DST, DST_MEM) && int_reg_ready[instr.dst] > cycle - 1) - break; - - if (has(instr, MASK_DST, DST_FLT) && flt_reg_ready[instr.dst] > cycle) - break; - - if (has(instr, MASK_DST, DST_INT) && int_reg_ready[instr.dst] > cycle) - break; - - if (hasRound && has(instr, OP_FLOAT)) - break; - - //execute - index++; - - if (has(instr, MASK_EXT, OP_BRANCH)) { - branchCount++; - } - - if (has(instr, MASK_DST, DST_FLT)) - flt_reg_ready[instr.dst] = cycle + 1; - - if (has(instr, MASK_DST, DST_INT)) - int_reg_ready[instr.dst] = cycle + 1; - - if (has(instr, MASK_EXT, OP_SWAP)) { - int_reg_ready[instr.src] = cycle + 1; - } - - if (has(instr, MASK_EXT, OP_CFROUND)) - hasRound = true; - - if (has(instr, MASK_SRC, SRC_MEM) || has(instr, MASK_DST, DST_MEM)) { - memoryAccesses++; - } - - if (print) - std::cout << std::setw(2) << (cycle + 1) << ": " << origi; - - //non-speculative execution must stall after branch - if (!speculate && has(instr, MASK_EXT, OP_BRANCH)) { - cycle += pipeline - 1; - break; - } - } - //std::cout << " workers: " << workers << std::endl; - cycle++; - } - if (speculate) { - //account for mispredicted branches - int i = 0; - while (branchCount--) { - auto entropy = p.getEntropy(i / 8); - entropy >> (i % 8) * 8; - if ((entropy & 0xff) == 0) // 1/256 chance to flush the pipeline - cycle += pipeline - 1; - } - } - return cycle; -} - -int executeOutOfOrder(randomx::Program& p, randomx::Program& original, bool print, int executionPorts, int memoryPorts, bool speculate, int pipeline) { - int index = 0; - int busyExecutionPorts[2 * RANDOMX_PROGRAM_SIZE] = { 0 }; - int busyMemoryPorts[2 * RANDOMX_PROGRAM_SIZE] = { 0 }; - int int_reg_ready[randomx::RegistersCount] = { 0 }; - int flt_reg_ready[randomx::RegistersCount] = { 0 }; - int fprcReady = 0; - int lastBranch = 0; - int branchCount = 0; - for (; index < RANDOMX_PROGRAM_SIZE; ++index) { - auto& instr = p(index); - int retireCycle = pipeline - 1; - - //non-speculative execution cannot reorder across branches - if (!speculate && !has(instr, MASK_EXT, OP_BRANCH)) - retireCycle = std::max(lastBranch + pipeline - 1, retireCycle); - - //check dependencies - if (has(instr, MASK_SRC, SRC_INT)) { - retireCycle = std::max(retireCycle, int_reg_ready[instr.src]); - int_reg_ready[instr.src] = retireCycle; - } - - if (has(instr, MASK_SRC, SRC_MEM)) { - retireCycle = std::max(retireCycle, int_reg_ready[instr.src] + 1); - //find free memory port - while (busyMemoryPorts[retireCycle - 1] >= memoryPorts) { - retireCycle++; - } - busyMemoryPorts[retireCycle - 1]++; - } - - if (has(instr, MASK_DST, DST_FLT)) { - retireCycle = std::max(retireCycle, flt_reg_ready[instr.dst]); - } - - if (has(instr, MASK_DST, DST_INT)) { - retireCycle = std::max(retireCycle, int_reg_ready[instr.dst]); - } - - //floating point operations depend on the fprc register - if (has(instr, OP_FLOAT)) - retireCycle = std::max(retireCycle, fprcReady); - - //execute - if (has(instr, MASK_DST, DST_MEM)) { - retireCycle = std::max(retireCycle, int_reg_ready[instr.dst] + 1); - //find free memory port - while (busyMemoryPorts[retireCycle - 1] >= memoryPorts) { - retireCycle++; - } - busyMemoryPorts[retireCycle - 1]++; - retireCycle++; - } - - if (has(instr, MASK_DST, DST_FLT)) { - //find free execution port - do { - retireCycle++; - } while (busyExecutionPorts[retireCycle - 1] >= executionPorts); - busyExecutionPorts[retireCycle - 1]++; - flt_reg_ready[instr.dst] = retireCycle; - } - - if (has(instr, MASK_DST, DST_INT)) { - //find free execution port - do { - retireCycle++; - } while (busyExecutionPorts[retireCycle - 1] >= executionPorts); - busyExecutionPorts[retireCycle - 1]++; - int_reg_ready[instr.dst] = retireCycle; - } - - if (has(instr, MASK_EXT, OP_SWAP)) { - int_reg_ready[instr.src] = retireCycle; - } - - if (has(instr, MASK_EXT, OP_CFROUND)) { - do { - retireCycle++; - } while (busyExecutionPorts[retireCycle - 1] >= executionPorts); - busyExecutionPorts[retireCycle - 1]++; - fprcReady = retireCycle; - } - - if (has(instr, MASK_EXT, OP_BRANCH)) { - /*if (!speculate && instr.mod == 1) { //simulated predication - do { - retireCycle++; - } while (busyExecutionPorts[retireCycle - 1] >= executionPorts); - busyExecutionPorts[retireCycle - 1]++; - int_reg_ready[instr.dst] = retireCycle; - }*/ - //else { - lastBranch = std::max(lastBranch, retireCycle); - branchCount++; - //} - } - - //print - auto& origi = original(index); - origi.dst %= randomx::RegistersCount; - origi.src %= randomx::RegistersCount; - if (print) { - std::cout << std::setw(2) << retireCycle << ": " << origi; - if (has(instr, MASK_EXT, OP_BRANCH)) { - std::cout << " jump: " << (int)instr.mod << std::endl; - } - } - } - int cycle = 0; - for (int i = 0; i < randomx::RegistersCount; ++i) { - cycle = std::max(cycle, int_reg_ready[i]); - } - for (int i = 0; i < randomx::RegistersCount; ++i) { - cycle = std::max(cycle, flt_reg_ready[i]); - } - if (speculate) { - //account for mispredicted branches - int i = 0; - while (branchCount--) { - auto entropy = p.getEntropy(i / 8); - entropy >> (i % 8) * 8; - if ((entropy & 0xff) == 0) // 1/256 chance to flush the pipeline - cycle += pipeline - 1; - } - } - return cycle; -} - -#include "../bytecode_machine.hpp" - -//old register selection -struct RegisterUsage { - int32_t lastUsed; - int32_t count; -}; - -inline int getConditionRegister(RegisterUsage(®isterUsage)[randomx::RegistersCount]) { - int min = INT_MAX; - int minCount = 0; - int minIndex; - //prefer registers that have been used as a condition register fewer times - for (unsigned i = 0; i < randomx::RegistersCount; ++i) { - if (registerUsage[i].lastUsed < min || (registerUsage[i].lastUsed == min && registerUsage[i].count < minCount)) { - min = registerUsage[i].lastUsed; - minCount = registerUsage[i].count; - minIndex = i; - } - } - return minIndex; -} - -int analyze(randomx::Program& p) { - int jumpCount = 0; - RegisterUsage registerUsage[randomx::RegistersCount]; - for (unsigned i = 0; i < randomx::RegistersCount; ++i) { - registerUsage[i].lastUsed = -1; - registerUsage[i].count = 0; - } - for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) { - auto& instr = p(i); - int opcode = instr.opcode; - instr.opcode = 0; - - if (opcode < randomx::ceil_IADD_RS) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= SRC_INT; - instr.opcode |= DST_INT; - registerUsage[instr.dst].lastUsed = i; - continue; - } - - if (opcode < randomx::ceil_IADD_M) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= SRC_MEM; - instr.opcode |= DST_INT; - if (instr.src != instr.dst) { - instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); - } - else { - instr.imm32 &= randomx::ScratchpadL3Mask; - } - registerUsage[instr.dst].lastUsed = i; - continue; - } - - if (opcode < randomx::ceil_ISUB_R) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= DST_INT; - instr.opcode |= SRC_INT; - registerUsage[instr.dst].lastUsed = i; - continue; - } - - if (opcode < randomx::ceil_ISUB_M) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= SRC_MEM; - instr.opcode |= DST_INT; - if (instr.src != instr.dst) { - instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); - } - else { - instr.imm32 &= randomx::ScratchpadL3Mask; - } - registerUsage[instr.dst].lastUsed = i; - continue; - } - - if (opcode < randomx::ceil_IMUL_R) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= DST_INT; - instr.opcode |= SRC_INT; - registerUsage[instr.dst].lastUsed = i; - continue; - } - - if (opcode < randomx::ceil_IMUL_M) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= SRC_MEM; - instr.opcode |= DST_INT; - if (instr.src != instr.dst) { - instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); - } - else { - instr.imm32 &= randomx::ScratchpadL3Mask; - } - registerUsage[instr.dst].lastUsed = i; - continue; - } - - if (opcode < randomx::ceil_IMULH_R) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= DST_INT; - instr.opcode |= SRC_INT; - registerUsage[instr.dst].lastUsed = i; - continue; - } - - if (opcode < randomx::ceil_IMULH_M) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= SRC_MEM; - instr.opcode |= DST_INT; - if (instr.src != instr.dst) { - instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); - } - else { - instr.imm32 &= randomx::ScratchpadL3Mask; - } - registerUsage[instr.dst].lastUsed = i; - continue; - } - - if (opcode < randomx::ceil_ISMULH_R) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= DST_INT; - instr.opcode |= SRC_INT; - registerUsage[instr.dst].lastUsed = i; - continue; - } - - if (opcode < randomx::ceil_ISMULH_M) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= SRC_MEM; - instr.opcode |= DST_INT; - if (instr.src != instr.dst) { - instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); - } - else { - instr.imm32 &= randomx::ScratchpadL3Mask; - } - registerUsage[instr.dst].lastUsed = i; - continue; - } - - if (opcode < randomx::ceil_IMUL_RCP) { - uint64_t divisor = instr.getImm32(); - if (!randomx::isZeroOrPowerOf2(divisor)) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.opcode |= DST_INT; - registerUsage[instr.dst].lastUsed = i; - } - continue; - } - - if (opcode < randomx::ceil_INEG_R) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.opcode |= DST_INT; - registerUsage[instr.dst].lastUsed = i; - continue; - } - - if (opcode < randomx::ceil_IXOR_R) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= DST_INT; - instr.opcode |= SRC_INT; - registerUsage[instr.dst].lastUsed = i; - continue; - } - - if (opcode < randomx::ceil_IXOR_M) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= SRC_MEM; - instr.opcode |= DST_INT; - if (instr.src != instr.dst) { - instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); - } - else { - instr.imm32 &= randomx::ScratchpadL3Mask; - } - registerUsage[instr.dst].lastUsed = i; - continue; - } - - if (opcode < randomx::ceil_IROR_R) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= DST_INT; - instr.opcode |= SRC_INT; - registerUsage[instr.dst].lastUsed = i; - continue; - } - - if (opcode < randomx::ceil_IROL_R) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= DST_INT; - instr.opcode |= SRC_INT; - registerUsage[instr.dst].lastUsed = i; - continue; - } - - if (opcode < randomx::ceil_ISWAP_R) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - if (instr.src != instr.dst) { - instr.opcode |= DST_INT; - instr.opcode |= SRC_INT; - instr.opcode |= OP_SWAP; - registerUsage[instr.dst].lastUsed = i; - registerUsage[instr.src].lastUsed = i; - } - continue; - } - - if (opcode < randomx::ceil_FSWAP_R) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.opcode |= DST_FLT; - continue; - } - - if (opcode < randomx::ceil_FADD_R) { - instr.dst = instr.dst % randomx::RegisterCountFlt; - instr.opcode |= DST_FLT; - instr.opcode |= OP_FLOAT; - continue; - } - - if (opcode < randomx::ceil_FADD_M) { - instr.dst = instr.dst % randomx::RegisterCountFlt; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= DST_FLT; - instr.opcode |= SRC_MEM; - instr.opcode |= OP_FLOAT; - instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); - continue; - } - - if (opcode < randomx::ceil_FSUB_R) { - instr.dst = instr.dst % randomx::RegisterCountFlt; - instr.opcode |= DST_FLT; - instr.opcode |= OP_FLOAT; - continue; - } - - if (opcode < randomx::ceil_FSUB_M) { - instr.dst = instr.dst % randomx::RegisterCountFlt; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= DST_FLT; - instr.opcode |= SRC_MEM; - instr.opcode |= OP_FLOAT; - instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); - continue; - } - - if (opcode < randomx::ceil_FSCAL_R) { - instr.dst = instr.dst % randomx::RegisterCountFlt; - instr.opcode |= DST_FLT; - continue; - } - - if (opcode < randomx::ceil_FMUL_R) { - instr.dst = 4 + instr.dst % randomx::RegisterCountFlt; - instr.opcode |= DST_FLT; - instr.opcode |= OP_FLOAT; - continue; - } - - if (opcode < randomx::ceil_FDIV_M) { - instr.dst = 4 + instr.dst % randomx::RegisterCountFlt; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= DST_FLT; - instr.opcode |= SRC_MEM; - instr.opcode |= OP_FLOAT; - instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); - continue; - } - - if (opcode < randomx::ceil_FSQRT_R) { - instr.dst = 4 + instr.dst % randomx::RegisterCountFlt; - instr.opcode |= DST_FLT; - instr.opcode |= OP_FLOAT; - continue; - } - - if (opcode < randomx::ceil_CBRANCH) { - instr.opcode |= OP_BRANCH; - instr.opcode |= DST_INT; - int reg = instr.dst % randomx::RegistersCount; - int target = registerUsage[reg].lastUsed; - int offset = (i - target); - instr.mod = offset; - jumpCount += offset; - p(target + 1).opcode |= BRANCH_TARGET; - registerUsage[reg].count++; - instr.dst = reg; - //mark all registers as used - for (unsigned j = 0; j < randomx::RegistersCount; ++j) { - registerUsage[j].lastUsed = i; - } - continue; - } - - if (opcode < randomx::ceil_CFROUND) { - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= SRC_INT; - instr.opcode |= OP_CFROUND; - continue; - } - - if (opcode < randomx::ceil_ISTORE) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= DST_MEM; - if (instr.getModCond() < randomx::StoreL3Condition) - instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); - else - instr.imm32 &= randomx::ScratchpadL3Mask; - continue; - } - - if (opcode < randomx::ceil_NOP) { - - } - } - return jumpCount; -} diff --git a/external/src/randomx/src/tests/rng-tests.cpp b/external/src/randomx/src/tests/rng-tests.cpp deleted file mode 100644 index fed4761..0000000 --- a/external/src/randomx/src/tests/rng-tests.cpp +++ /dev/null @@ -1,93 +0,0 @@ -/* - cd ~ - wget http://simul.iro.umontreal.ca/testu01/TestU01.zip - unzip TestU01.zip - mkdir TestU01 - cd TestU01-1.2.3 - ./configure --prefix=`pwd`/../TestU01 - make -j8 - make install - cd ~/RandomX - g++ -O3 src/tests/rng-tests.cpp -lm -I ~/TestU01/include -L ~/TestU01/lib -L bin/ -l:libtestu01.a -l:libmylib.a -l:libprobdist.a -lrandomx -o bin/rng-tests -DRANDOMX_GEN=4R -DRANDOMX_TESTU01=Crush - bin/rng-tests 0 -*/ - -extern "C" { - #include "unif01.h" - #include "bbattery.h" -} - -#include "../aes_hash.hpp" -#include "../blake2/blake2.h" -#include "utility.hpp" -#include - -#ifndef RANDOMX_GEN -#error Please define RANDOMX_GEN with a value of 1R or 4R -#endif - -#ifndef RANDOMX_TESTU01 -#error Please define RANDOMX_TESTU01 with a value of SmallCrush, Crush or BigCrush -#endif - -#define STR(x) #x -#define CONCAT(a,b,c) a ## b ## c -#define GEN_NAME(x) "AesGenerator" STR(x) -#define GEN_FUNC(x) CONCAT(fillAes, x, x4) -#define TEST_SUITE(x) CONCAT(bbattery_, x,) - -constexpr int GeneratorStateSize = 64; -constexpr int GeneratorCapacity = GeneratorStateSize / sizeof(uint32_t); - -static unsigned long aesGenBits(void *param, void *state) { - uint32_t* statePtr = (uint32_t*)state; - int* indexPtr = (int*)param; - int stateIndex = *indexPtr; - if(stateIndex >= GeneratorCapacity) { - GEN_FUNC(RANDOMX_GEN)(statePtr, GeneratorStateSize, statePtr); - stateIndex = 0; - } - uint32_t next = statePtr[stateIndex]; - *indexPtr = stateIndex + 1; - return next; -} - -static double aesGenDouble(void *param, void *state) { - return aesGenBits (param, state) / unif01_NORM32; -} - -static void aesWriteState(void* state) { - char* statePtr = (char*)state; - for(int i = 0; i < 4; ++i) { - std::cout << "state" << i << " = "; - outputHex(std::cout, statePtr + (i * 16), 16); - std::cout << std::endl; - } -} - -int main(int argc, char** argv) { - if (argc != 2) { - std::cout << argv[0] << " " << std::endl; - return 1; - } - uint32_t state[GeneratorCapacity] = { 0 }; - int stateIndex = GeneratorCapacity; - char name[] = GEN_NAME(RANDOMX_GEN); - uint64_t seed = strtoull(argv[1], nullptr, 0); - if(seed) { - blake2b(&state, sizeof(state), &seed, sizeof(seed), nullptr, 0); - } - unif01_Gen gen; - gen.state = &state; - gen.param = &stateIndex; - gen.Write = &aesWriteState; - gen.GetU01 = &aesGenDouble; - gen.GetBits = &aesGenBits; - gen.name = (char*)name; - - gen.Write(gen.state); - std::cout << std::endl; - - TEST_SUITE(RANDOMX_TESTU01)(&gen); - return 0; -} \ No newline at end of file diff --git a/external/src/randomx/src/tests/runtime-distr.cpp b/external/src/randomx/src/tests/runtime-distr.cpp deleted file mode 100644 index b7663d0..0000000 --- a/external/src/randomx/src/tests/runtime-distr.cpp +++ /dev/null @@ -1,172 +0,0 @@ - -#include -#include "utility.hpp" -#include "stopwatch.hpp" -#include "../dataset.hpp" -#include "../vm_compiled.hpp" -#include "../blake2/blake2.h" - -struct Outlier { - Outlier(int idx, double rtime) : index(idx), runtime(rtime) {} - int index; - double runtime; -}; - -int main(int argc, char** argv) { - constexpr int distributionSize = 100; - int distribution[distributionSize + 1] = { 0 }; - Stopwatch sw; - alignas(16) uint64_t hash[8]; - - uint64_t checksum = 0; - double totalRuntime = 0; - double maxRuntime = 0; - std::vector outliers; - outliers.reserve(25); - randomx_flags flags = RANDOMX_FLAG_DEFAULT; - - bool softAes, largePages, jit, verify; - int totalCount, initThreadCount; - double binSize, offset; - int32_t seed; - - readOption("--verify", argc, argv, verify); - readOption("--jit", argc, argv, jit); - readOption("--softAes", argc, argv, softAes); - readIntOption("--nonces", argc, argv, totalCount, 10000); - readIntOption("--init", argc, argv, initThreadCount, 1); - readFloatOption("--binSize", argc, argv, binSize, 1e-3); - readFloatOption("--offset", argc, argv, offset, 0); - readIntOption("--seed", argc, argv, seed, 0); - readOption("--largePages", argc, argv, largePages); - - if (!verify) { - flags = (randomx_flags)(flags | RANDOMX_FLAG_FULL_MEM); - std::cout << "Measure program runtime" << std::endl; - } - else { - std::cout << "Measure verification time" << std::endl; - } - - std::cout << " - histogram offset: " << offset << std::endl; - std::cout << " - histogram bin size: " << binSize << std::endl; - - if (jit) { - flags = (randomx_flags)(flags | RANDOMX_FLAG_JIT); - std::cout << " - JIT compiled mode" << std::endl; - } - else { - std::cout << " - interpreted mode" << std::endl; - } - - if (softAes) { - std::cout << " - software AES mode" << std::endl; - } - else { - flags = (randomx_flags)(flags | RANDOMX_FLAG_HARD_AES); - std::cout << " - hardware AES mode" << std::endl; - } - - if (largePages) { - flags = (randomx_flags)(flags | RANDOMX_FLAG_LARGE_PAGES); - std::cout << " - large pages mode" << std::endl; - } - else { - std::cout << " - small pages mode" << std::endl; - } - - std::cout << "Initializing..." << std::endl; - - randomx_cache *cache = randomx_alloc_cache(flags); - randomx_dataset *dataset = nullptr; - if (cache == nullptr) { - std::cout << "Cache allocation failed" << std::endl; - return 1; - } - randomx_init_cache(cache, &seed, sizeof seed); - - if (!verify) { - blake2b(&hash, sizeof hash, &seed, sizeof seed, nullptr, 0); - - dataset = randomx_alloc_dataset(flags); - if (dataset == nullptr) { - std::cout << "Dataset allocation failed" << std::endl; - return 1; - } - - std::vector threads; - uint32_t datasetItemCount = randomx_dataset_item_count(); - if (initThreadCount > 1) { - auto perThread = datasetItemCount / initThreadCount; - auto remainder = datasetItemCount % initThreadCount; - uint32_t startItem = 0; - for (int i = 0; i < initThreadCount; ++i) { - auto count = perThread + (i == initThreadCount - 1 ? remainder : 0); - threads.push_back(std::thread(&randomx_init_dataset, dataset, cache, startItem, count)); - startItem += count; - } - for (unsigned i = 0; i < threads.size(); ++i) { - threads[i].join(); - } - } - else { - randomx_init_dataset(dataset, cache, 0, datasetItemCount); - } - randomx_release_cache(cache); - cache = nullptr; - } - - std::cout << "Running " << totalCount << " programs..." << std::endl; - - randomx_vm* vm = randomx_create_vm(flags, cache, dataset); - - if (!verify) { - vm->initScratchpad(&hash); - vm->resetRoundingMode(); - } - - for (int i = 0; i < totalCount; ++i) { - sw.restart(); - if (verify) - randomx_calculate_hash(vm, &i, sizeof i, &hash); - else - vm->run(&hash); - double elapsed = sw.getElapsed(); - //std::cout << "Elapsed: " << elapsed << std::endl; - totalRuntime += elapsed; - if (elapsed > maxRuntime) - maxRuntime = elapsed; - int bin = (elapsed - offset) / binSize; - bool outlier = false; - if (bin < 0) { - bin = 0; - outlier = true; - } - if (bin > distributionSize) { - bin = distributionSize; - outlier = true; - } - if (outlier && outliers.size() < outliers.capacity()) - outliers.push_back(Outlier(i, elapsed)); - distribution[bin]++; - if(!verify) - blake2b(hash, sizeof(hash), vm->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0); - checksum ^= hash[0]; - } - - for (int i = 0; i < distributionSize + 1; ++i) { - std::cout << i << " " << distribution[i] << std::endl; - } - - std::cout << "Average runtime: " << totalRuntime / totalCount << std::endl; - std::cout << "Maximum runtime: " << maxRuntime << std::endl; - std::cout << "Checksum: " << checksum << std::endl; - - std::cout << "Outliers: " << std::endl; - - for (Outlier& ol : outliers) { - std::cout << " " << ol.index << ": " << ol.runtime << std::endl; - } - - return 0; -} \ No newline at end of file diff --git a/external/src/randomx/src/tests/scratchpad-entropy.cpp b/external/src/randomx/src/tests/scratchpad-entropy.cpp deleted file mode 100644 index ecb3c7d..0000000 --- a/external/src/randomx/src/tests/scratchpad-entropy.cpp +++ /dev/null @@ -1,50 +0,0 @@ -#include -#include -#include "utility.hpp" -#include "../randomx.h" -#include "../virtual_machine.hpp" -#include "../blake2/endian.h" - -/* - Writes final scratchpads to disk as files with .spad extension, each file is 2048 KiB. - Command line parameters: - --count N number of files to generate (default = 1) - --seed S different seed will give different outputs (default = 0) - - Entropy can be estimated by compressing the files using 7zip in Ultra mode: - - 7z.exe a -t7z -m0=lzma2 -mx=9 scratchpads.7z *.spad -*/ - -int main(int argc, char** argv) { - int count, seedValue; - - readIntOption("--count", argc, argv, count, 1); - readIntOption("--seed", argc, argv, seedValue, 0); - - std::cout << "Generating " << count << " scratchpad(s) using seed " << seedValue << " ..." << std::endl; - - char seed[4]; - char input[4]; - char hash[RANDOMX_HASH_SIZE]; - - store32(&seed, seedValue); - - randomx_cache *cache = randomx_alloc_cache(RANDOMX_FLAG_DEFAULT); - randomx_init_cache(cache, &seed, sizeof seed); - randomx_vm *vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT, cache, NULL); - - for (int i = 0; i < count; ++i) { - store32(&input, i); - randomx_calculate_hash(vm, &input, sizeof input, hash); - std::string filename("test-"); - filename += std::to_string(i); - filename += ".spad"; - dump((const char*)vm->getScratchpad(), randomx::ScratchpadSize, filename.c_str()); - } - - randomx_destroy_vm(vm); - randomx_release_cache(cache); - - return 0; -} diff --git a/external/src/randomx/src/tests/stopwatch.hpp b/external/src/randomx/src/tests/stopwatch.hpp deleted file mode 100644 index d1e4912..0000000 --- a/external/src/randomx/src/tests/stopwatch.hpp +++ /dev/null @@ -1,84 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include -#include - -class Stopwatch { -public: - Stopwatch(bool startNow = false) { - reset(); - if (startNow) { - start(); - } - } - void reset() { - isRunning = false; - elapsed = 0; - } - void start() { - if (!isRunning) { - startMark = std::chrono::high_resolution_clock::now(); - isRunning = true; - } - } - void restart() { - startMark = std::chrono::high_resolution_clock::now(); - isRunning = true; - elapsed = 0; - } - void stop() { - if (isRunning) { - chrono_t endMark = std::chrono::high_resolution_clock::now(); - uint64_t ns = std::chrono::duration_cast(endMark - startMark).count(); - elapsed += ns; - isRunning = false; - } - } - double getElapsed() const { - return getElapsedNanosec() / 1e+9; - } -private: - using chrono_t = std::chrono::high_resolution_clock::time_point; - using sw_unit = std::chrono::nanoseconds; - chrono_t startMark; - uint64_t elapsed; - bool isRunning; - - uint64_t getElapsedNanosec() const { - uint64_t elns = elapsed; - if (isRunning) { - chrono_t endMark = std::chrono::high_resolution_clock::now(); - uint64_t ns = std::chrono::duration_cast(endMark - startMark).count(); - elns += ns; - } - return elns; - } -}; \ No newline at end of file diff --git a/external/src/randomx/src/tests/superscalar-avalanche.cpp b/external/src/randomx/src/tests/superscalar-avalanche.cpp deleted file mode 100644 index d9f916c..0000000 --- a/external/src/randomx/src/tests/superscalar-avalanche.cpp +++ /dev/null @@ -1,48 +0,0 @@ -#include -#include -#include -#include "../superscalar.hpp" -#include "../intrin_portable.h" - -const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 }; - -int main() { - - int insensitiveProgCount[64] = { 0 }; - std::vector dummy; - for (int bit = 0; bit < 64; ++bit) { - for (int i = 0; i < 10000; ++i) { - uint64_t ra[8] = { - 6364136223846793005ULL, - 9298410992540426748ULL, - 12065312585734608966ULL, - 9306329213124610396ULL, - 5281919268842080866ULL, - 10536153434571861004ULL, - 3398623926847679864ULL, - 9549104520008361294ULL, - }; - uint64_t rb[8]; - memcpy(rb, ra, sizeof rb); - rb[0] ^= (1ULL << bit); - randomx::SuperscalarProgram p; - randomx::Blake2Generator gen(seed, sizeof seed, i); - randomx::generateSuperscalar(p, gen); - randomx::executeSuperscalar(ra, p, nullptr); - randomx::executeSuperscalar(rb, p, nullptr); - uint64_t diff = 0; - for (int j = 0; j < 8; ++j) { - diff += __popcnt64(ra[j] ^ rb[j]); - } - if (diff < 192 || diff > 320) { - std::cout << "Seed: " << i << " diff = " << diff << std::endl; - insensitiveProgCount[bit]++; - } - } - } - for (int bit = 0; bit < 64; ++bit) { - std::cout << bit << " " << insensitiveProgCount[bit] << std::endl; - } - - return 0; -} \ No newline at end of file diff --git a/external/src/randomx/src/tests/superscalar-init.cpp b/external/src/randomx/src/tests/superscalar-init.cpp deleted file mode 100644 index 15554bb..0000000 --- a/external/src/randomx/src/tests/superscalar-init.cpp +++ /dev/null @@ -1,55 +0,0 @@ -#include -#include -#include -#include -#include "../superscalar.hpp" -#include "../common.hpp" - -int main() { - std::cout << "THIS PROGRAM REQUIRES MORE THAN 16 GB OF RAM TO COMPLETE" << std::endl; - std::vector dummy; - constexpr uint64_t superscalarMul0 = 6364136223846793005ULL; - constexpr uint64_t superscalarAdd1 = 0x810A978A59F5A1FC; //9298410992540426748ULL; //9298410992540426048ULL - constexpr uint64_t superscalarAdd2 = 12065312585734608966ULL; - constexpr uint64_t superscalarAdd3 = 0x8126B91CBF22495C; //9306329213124610396ULL; - constexpr uint64_t superscalarAdd4 = 5281919268842080866ULL; - constexpr uint64_t superscalarAdd5 = 10536153434571861004ULL; - constexpr uint64_t superscalarAdd6 = 3398623926847679864ULL; - constexpr uint64_t superscalarAdd7 = 9549104520008361294ULL; - constexpr uint32_t totalItems = randomx::DatasetSize / randomx::CacheLineSize; - std::unordered_set registerValues; - registerValues.reserve(totalItems); - registerValues.rehash(totalItems); - int collisionCount[9] = { 0 }; - for (uint32_t itemNumber = 0; itemNumber < totalItems; ++itemNumber) { - uint64_t rl[8]; - rl[0] = (itemNumber + 1) * superscalarMul0; - rl[1] = rl[0] ^ superscalarAdd1; - rl[2] = rl[0] ^ superscalarAdd2; - rl[3] = rl[0] ^ superscalarAdd3; - rl[4] = rl[0] ^ superscalarAdd4; - rl[5] = rl[0] ^ superscalarAdd5; - rl[6] = rl[0] ^ superscalarAdd6; - rl[7] = rl[0] ^ superscalarAdd7; - int blockCollisions = 0; - for (int i = 0; i < 8; ++i) { - uint64_t reducedValue = rl[i] & 0x3FFFFFFFFFFFF8; //bits 3-53 only - if (registerValues.find(reducedValue) != registerValues.end()) { - blockCollisions++; - std::cout << "Item " << itemNumber << ": collision of register r" << i << std::endl; - } - else { - registerValues.insert(reducedValue); - } - } - collisionCount[blockCollisions]++; - if ((itemNumber % (320 * 1024)) == 0) - std::cout << "Item " << itemNumber << " processed" << std::endl; - } - - for (int i = 0; i < 9; ++i) { - std::cout << i << " register(s) collide in " << collisionCount[i] << " items" << std::endl; - } - - return 0; -} \ No newline at end of file diff --git a/external/src/randomx/src/tests/superscalar-stats.cpp b/external/src/randomx/src/tests/superscalar-stats.cpp deleted file mode 100644 index 50924e5..0000000 --- a/external/src/randomx/src/tests/superscalar-stats.cpp +++ /dev/null @@ -1,52 +0,0 @@ -#include -#include -#include "../superscalar.hpp" -#include "../blake2_generator.hpp" - -const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 }; - -int main() { - - constexpr int count = 1000000; - int isnCounts[(int)randomx::SuperscalarInstructionType::COUNT] = { 0 }; - int64_t asicLatency = 0; - int64_t codesize = 0; - int64_t cpuLatency = 0; - int64_t macroOps = 0; - int64_t mulCount = 0; - int64_t size = 0; - for (int i = 0; i < count; ++i) { - randomx::SuperscalarProgram prog; - randomx::Blake2Generator gen(seed, sizeof(seed), i); - randomx::generateSuperscalar(prog, gen); - asicLatency += prog.asicLatency; - codesize += prog.codeSize; - cpuLatency += prog.cpuLatency; - macroOps += prog.macroOps; - mulCount += prog.mulCount; - size += prog.getSize(); - - for (unsigned j = 0; j < prog.getSize(); ++j) { - isnCounts[prog(j).opcode]++; - } - - if ((i + 1) % (count / 100) == 0) { - std::cout << "Completed " << ((i + 1) / (count / 100)) << "% ..." << std::endl; - } - } - - std::cout << "Avg. IPC: " << (macroOps / (double)cpuLatency) << std::endl; - std::cout << "Avg. ASIC latency: " << (asicLatency / (double)count) << std::endl; - std::cout << "Avg. CPU latency: " << (cpuLatency / (double)count) << std::endl; - std::cout << "Avg. code size: " << (codesize / (double)count) << std::endl; - std::cout << "Avg. x86 ops: " << (macroOps / (double)count) << std::endl; - std::cout << "Avg. mul. count: " << (mulCount / (double)count) << std::endl; - std::cout << "Avg. RandomX ops: " << (size / (double)count) << std::endl; - - std::cout << "Frequencies: " << std::endl; - for (unsigned j = 0; j < (int)randomx::SuperscalarInstructionType::COUNT; ++j) { - std::cout << j << " " << isnCounts[j] << " " << isnCounts[j] / (double)size << std::endl; - } - - return 0; -} \ No newline at end of file diff --git a/external/src/randomx/src/tests/tests.cpp b/external/src/randomx/src/tests/tests.cpp deleted file mode 100644 index 412585b..0000000 --- a/external/src/randomx/src/tests/tests.cpp +++ /dev/null @@ -1,1096 +0,0 @@ -#ifdef NDEBUG -#undef NDEBUG -#endif - -#include -#include -#include "utility.hpp" -#include "../bytecode_machine.hpp" -#include "../dataset.hpp" -#include "../blake2/endian.h" -#include "../blake2/blake2.h" -#include "../blake2_generator.hpp" -#include "../superscalar.hpp" -#include "../reciprocal.h" -#include "../intrin_portable.h" -#include "../jit_compiler.hpp" -#include "../aes_hash.hpp" - -randomx_cache* cache; -randomx_vm* vm = nullptr; - -template -void initCache(const char (&key)[N]) { - assert(cache != nullptr); - randomx_init_cache(cache, key, N - 1); - if (vm != nullptr) - randomx_vm_set_cache(vm, cache); -} - -template -void calcStringHash(const char(&key)[K], const char(&input)[H], void* output) { - initCache(key); - assert(vm != nullptr); - randomx_calculate_hash(vm, input, H - 1, output); -} - -template -void calcHexHash(const char(&key)[K], const char(&hex)[H], void* output) { - initCache(key); - assert(vm != nullptr); - char input[H / 2]; - hex2bin((char*)hex, H - 1, input); - randomx_calculate_hash(vm, input, sizeof(input), output); -} - -int testNo = 0; -int skipped = 0; - -template -void runTest(const char* name, bool condition, FUNC f) { - std::cout << "["; - std::cout.width(2); - std::cout << std::right << ++testNo << "] "; - std::cout.width(40); - std::cout << std::left << name << " ... "; - std::cout.flush(); - if (condition) { - f(); - std::cout << "PASSED" << std::endl; - } - else { - std::cout << "SKIPPED" << std::endl; - skipped++; - } -} - -int main() { - char testHash[32]; - - //std::cout << "Allocating randomx_cache..." << std::endl; - cache = randomx_alloc_cache(RANDOMX_FLAG_DEFAULT); - - runTest("Cache initialization", RANDOMX_ARGON_ITERATIONS == 3 && RANDOMX_ARGON_LANES == 1 && RANDOMX_ARGON_MEMORY == 262144 && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), []() { - initCache("test key 000"); - uint64_t* cacheMemory = (uint64_t*)cache->memory; - assert(cacheMemory[0] == 0x191e0e1d23c02186); - assert(cacheMemory[1568413] == 0xf1b62fe6210bf8b1); - assert(cacheMemory[33554431] == 0x1f47f056d05cd99b); - }); - - runTest("SuperscalarHash generator", RANDOMX_SUPERSCALAR_LATENCY == 170, []() { - char sprogHash[32]; - randomx::SuperscalarProgram sprog; - const char key[] = "test key 000"; - constexpr size_t keySize = sizeof(key) - 1; - randomx::Blake2Generator gen(key, keySize); - - const char superscalarReferences[10][65] = { - "d3a4a6623738756f77e6104469102f082eff2a3e60be7ad696285ef7dfc72a61", - "f5e7e0bbc7e93c609003d6359208688070afb4a77165a552ff7be63b38dfbc86", - "85ed8b11734de5b3e9836641413a8f36e99e89694f419c8cd25c3f3f16c40c5a", - "5dd956292cf5d5704ad99e362d70098b2777b2a1730520be52f772ca48cd3bc0", - "6f14018ca7d519e9b48d91af094c0f2d7e12e93af0228782671a8640092af9e5", - "134be097c92e2c45a92f23208cacd89e4ce51f1009a0b900dbe83b38de11d791", - "268f9392c20c6e31371a5131f82bd7713d3910075f2f0468baafaa1abd2f3187", - "c668a05fd909714ed4a91e8d96d67b17e44329e88bc71e0672b529a3fc16be47", - "99739351315840963011e4c5d8e90ad0bfed3facdcb713fe8f7138fbf01c4c94", - "14ab53d61880471f66e80183968d97effd5492b406876060e595fcf9682f9295", - }; - - for (int i = 0; i < 10; ++i) { - randomx::generateSuperscalar(sprog, gen); - blake2b(sprogHash, sizeof(sprogHash), &sprog.programBuffer, sizeof(randomx::Instruction) * sprog.getSize(), nullptr, 0); - assert(equalsHex(sprogHash, superscalarReferences[i])); - } - }); - - runTest("randomx_reciprocal", true, []() { - assert(randomx_reciprocal(3) == 12297829382473034410U); - assert(randomx_reciprocal(13) == 11351842506898185609U); - assert(randomx_reciprocal(33) == 17887751829051686415U); - assert(randomx_reciprocal(65537) == 18446462603027742720U); - assert(randomx_reciprocal(15000001) == 10316166306300415204U); - assert(randomx_reciprocal(3845182035) == 10302264209224146340U); - assert(randomx_reciprocal(0xffffffff) == 9223372039002259456U); - }); - - runTest("randomx_reciprocal_fast", RANDOMX_HAVE_FAST_RECIPROCAL, []() { - assert(randomx_reciprocal_fast(3) == 12297829382473034410U); - assert(randomx_reciprocal_fast(13) == 11351842506898185609U); - assert(randomx_reciprocal_fast(33) == 17887751829051686415U); - assert(randomx_reciprocal_fast(65537) == 18446462603027742720U); - assert(randomx_reciprocal_fast(15000001) == 10316166306300415204U); - assert(randomx_reciprocal_fast(3845182035) == 10302264209224146340U); - assert(randomx_reciprocal_fast(0xffffffff) == 9223372039002259456U); - }); - - runTest("Dataset initialization (interpreter)", stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), []() { - initCache("test key 000"); - uint64_t datasetItem[8]; - randomx::initDatasetItem(cache, (uint8_t*)&datasetItem, 0); - assert(datasetItem[0] == 0x680588a85ae222db); - randomx::initDatasetItem(cache, (uint8_t*)&datasetItem, 10000000); - assert(datasetItem[0] == 0x7943a1f6186ffb72); - randomx::initDatasetItem(cache, (uint8_t*)&datasetItem, 20000000); - assert(datasetItem[0] == 0x9035244d718095e1); - randomx::initDatasetItem(cache, (uint8_t*)&datasetItem, 30000000); - assert(datasetItem[0] == 0x145a5091f7853099); - }); - - runTest("Dataset initialization (compiler)", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), []() { - initCache("test key 000"); - randomx::JitCompiler jit; - jit.generateSuperscalarHash(cache->programs, cache->reciprocalCache); - jit.generateDatasetInitCode(); -#ifdef RANDOMX_FORCE_SECURE - jit.enableExecution(); -#else - jit.enableAll(); -#endif - uint64_t datasetItem[8]; - jit.getDatasetInitFunc()(cache, (uint8_t*)&datasetItem, 0, 1); - assert(datasetItem[0] == 0x680588a85ae222db); - jit.getDatasetInitFunc()(cache, (uint8_t*)&datasetItem, 10000000, 10000001); - assert(datasetItem[0] == 0x7943a1f6186ffb72); - jit.getDatasetInitFunc()(cache, (uint8_t*)&datasetItem, 20000000, 20000001); - assert(datasetItem[0] == 0x9035244d718095e1); - jit.getDatasetInitFunc()(cache, (uint8_t*)&datasetItem, 30000000, 30000001); - assert(datasetItem[0] == 0x145a5091f7853099); - }); - - runTest("AesGenerator1R", true, []() { - char state[64] = { 0 }; - hex2bin("6c19536eb2de31b6c0065f7f116e86f960d8af0c57210a6584c3237b9d064dc7", 64, state); - fillAes1Rx4(state, sizeof(state), state); - assert(equalsHex(state, "fa89397dd6ca422513aeadba3f124b5540324c4ad4b6db434394307a17c833ab")); - }); - - randomx::NativeRegisterFile reg; - randomx::BytecodeMachine decoder; - randomx::InstructionByteCode ibc; - alignas(16) randomx::ProgramConfiguration config; - constexpr int registerHigh = 192; - constexpr int registerDst = 0; - constexpr int registerSrc = 1; - int pc = 0; - constexpr uint32_t imm32 = 3234567890; - constexpr uint64_t imm64 = signExtend2sCompl(imm32); - - decoder.beginCompilation(reg); - - runTest("IADD_RS (decode)", RANDOMX_FREQ_IADD_RS > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_IADD_RS - 1; - instr.dst = registerHigh | registerDst; - instr.src = registerHigh | registerSrc; - instr.mod = UINT8_MAX; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::IADD_RS); - assert(ibc.idst == ®.r[registerDst]); - assert(ibc.isrc == ®.r[registerSrc]); - assert(ibc.shift == 3); - assert(ibc.imm == 0); - }); - - runTest("IADD_RS (execute)", RANDOMX_FREQ_IADD_RS > 0, [&] { - reg.r[registerDst] = 0x8000000000000000; - reg.r[registerSrc] = 0x1000000000000000; - decoder.executeInstruction(ibc, pc, nullptr, config); - assert(reg.r[registerDst] == 0); - }); - - runTest("IADD_RS with immediate (decode)", RANDOMX_FREQ_IADD_RS > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_IADD_RS - 1; - instr.mod = 8; - instr.dst = registerHigh | randomx::RegisterNeedsDisplacement; - instr.src = registerHigh | registerSrc; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::IADD_RS); - assert(ibc.idst == ®.r[randomx::RegisterNeedsDisplacement]); - assert(ibc.isrc == ®.r[registerSrc]); - assert(ibc.shift == 2); - assert(ibc.imm == imm64); - }); - - runTest("IADD_RS with immediate (decode)", RANDOMX_FREQ_IADD_RS > 0, [&] { - reg.r[randomx::RegisterNeedsDisplacement] = 0x8000000000000000; - reg.r[registerSrc] = 0x2000000000000000; - decoder.executeInstruction(ibc, pc, nullptr, config); - assert(reg.r[randomx::RegisterNeedsDisplacement] == imm64); - }); - - runTest("IADD_M (decode)", RANDOMX_FREQ_IADD_M > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_IADD_M - 1; - instr.mod = 1; - instr.dst = registerHigh | registerDst; - instr.src = registerHigh | registerSrc; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::IADD_M); - assert(ibc.idst == ®.r[registerDst]); - assert(ibc.isrc == ®.r[registerSrc]); - assert(ibc.imm == imm64); - assert(ibc.memMask == randomx::ScratchpadL1Mask); - }); - - runTest("ISUB_R (decode)", RANDOMX_FREQ_ISUB_R > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_ISUB_R - 1; - instr.dst = registerHigh | registerDst; - instr.src = registerHigh | registerSrc; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::ISUB_R); - assert(ibc.idst == ®.r[registerDst]); - assert(ibc.isrc == ®.r[registerSrc]); - }); - - runTest("ISUB_R (execute)", RANDOMX_FREQ_ISUB_R > 0, [&] { - reg.r[registerDst] = 1; - reg.r[registerSrc] = 0xFFFFFFFF; - decoder.executeInstruction(ibc, pc, nullptr, config); - assert(reg.r[registerDst] == 0xFFFFFFFF00000002); - }); - - runTest("ISUB_R with immediate (decode)", RANDOMX_FREQ_ISUB_R > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_ISUB_R - 1; - instr.dst = registerHigh | registerDst; - instr.src = registerHigh | registerDst; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::ISUB_R); - assert(ibc.idst == ®.r[registerDst]); - assert(ibc.isrc == &ibc.imm); - }); - - runTest("ISUB_R with immediate (decode)", RANDOMX_FREQ_ISUB_R > 0, [&] { - reg.r[registerDst] = 0; - decoder.executeInstruction(ibc, pc, nullptr, config); - assert(reg.r[registerDst] == (~imm64 + 1)); - }); - - runTest("ISUB_M (decode)", RANDOMX_FREQ_ISUB_M > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_ISUB_M - 1; - instr.mod = 0; - instr.dst = registerHigh | registerDst; - instr.src = registerHigh | registerSrc; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::ISUB_M); - assert(ibc.idst == ®.r[registerDst]); - assert(ibc.isrc == ®.r[registerSrc]); - assert(ibc.imm == imm64); - assert(ibc.memMask == randomx::ScratchpadL2Mask); - }); - - runTest("IMUL_R (decode)", RANDOMX_FREQ_IMUL_R > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_IMUL_R - 1; - instr.dst = registerHigh | registerDst; - instr.src = registerHigh | registerSrc; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::IMUL_R); - assert(ibc.idst == ®.r[registerDst]); - assert(ibc.isrc == ®.r[registerSrc]); - }); - - runTest("IMUL_R (execute)", RANDOMX_FREQ_IMUL_R > 0, [&] { - reg.r[registerDst] = 0xBC550E96BA88A72B; - reg.r[registerSrc] = 0xF5391FA9F18D6273; - decoder.executeInstruction(ibc, pc, nullptr, config); - assert(reg.r[registerDst] == 0x28723424A9108E51); - }); - - runTest("IMUL_R with immediate (decode)", RANDOMX_FREQ_IMUL_R > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_IMUL_R - 1; - instr.dst = registerHigh | registerDst; - instr.src = registerHigh | registerDst; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::IMUL_R); - assert(ibc.idst == ®.r[registerDst]); - assert(ibc.isrc == &ibc.imm); - }); - - runTest("IMUL_R with immediate (execute)", RANDOMX_FREQ_IMUL_R > 0, [&] { - reg.r[registerDst] = 1; - decoder.executeInstruction(ibc, pc, nullptr, config); - assert(reg.r[registerDst] == imm64); - }); - - runTest("IMUL_M (decode)", RANDOMX_FREQ_IMUL_M > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_IMUL_M - 1; - instr.mod = 0; - instr.dst = registerHigh | registerDst; - instr.src = registerHigh | registerDst; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::IMUL_M); - assert(ibc.idst == ®.r[registerDst]); - assert(*ibc.isrc == 0); - assert(ibc.imm == imm64); - assert(ibc.memMask == randomx::ScratchpadL3Mask); - }); - - runTest("IMULH_R (decode)", RANDOMX_FREQ_IMULH_R > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_IMULH_R - 1; - instr.dst = registerHigh | registerDst; - instr.src = registerHigh | registerSrc; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::IMULH_R); - assert(ibc.idst == ®.r[registerDst]); - assert(ibc.isrc == ®.r[registerSrc]); - }); - - runTest("IMULH_R (execute)", RANDOMX_FREQ_IMULH_R > 0, [&] { - reg.r[registerDst] = 0xBC550E96BA88A72B; - reg.r[registerSrc] = 0xF5391FA9F18D6273; - decoder.executeInstruction(ibc, pc, nullptr, config); - assert(reg.r[registerDst] == 0xB4676D31D2B34883); - }); - - runTest("IMULH_R squared (decode)", RANDOMX_FREQ_IMULH_R > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_IMULH_R - 1; - instr.dst = registerHigh | registerDst; - instr.src = registerHigh | registerDst; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::IMULH_R); - assert(ibc.idst == ®.r[registerDst]); - assert(ibc.isrc == ®.r[registerDst]); - }); - - runTest("IMULH_M (decode)", RANDOMX_FREQ_IMULH_M > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_IMULH_M - 1; - instr.mod = 0; - instr.dst = registerHigh | registerDst; - instr.src = registerHigh | registerSrc; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::IMULH_M); - assert(ibc.idst == ®.r[registerDst]); - assert(ibc.isrc == ®.r[registerSrc]); - assert(ibc.imm == imm64); - assert(ibc.memMask == randomx::ScratchpadL2Mask); - }); - - runTest("ISMULH_R (decode)", RANDOMX_FREQ_ISMULH_R > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_ISMULH_R - 1; - instr.dst = registerHigh | registerDst; - instr.src = registerHigh | registerSrc; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::ISMULH_R); - assert(ibc.idst == ®.r[registerDst]); - assert(ibc.isrc == ®.r[registerSrc]); - }); - - runTest("ISMULH_R (execute)", RANDOMX_FREQ_ISMULH_R > 0, [&] { - reg.r[registerDst] = 0xBC550E96BA88A72B; - reg.r[registerSrc] = 0xF5391FA9F18D6273; - decoder.executeInstruction(ibc, pc, nullptr, config); - assert(reg.r[registerDst] == 0x02D93EF1269D3EE5); - }); - - runTest("ISMULH_R squared (decode)", RANDOMX_FREQ_ISMULH_R > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_ISMULH_R - 1; - instr.dst = registerHigh | registerDst; - instr.src = registerHigh | registerDst; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::ISMULH_R); - assert(ibc.idst == ®.r[registerDst]); - assert(ibc.isrc == ®.r[registerDst]); - }); - - runTest("ISMULH_M (decode)", RANDOMX_FREQ_ISMULH_M > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_ISMULH_M - 1; - instr.mod = 3; - instr.dst = registerHigh | registerDst; - instr.src = registerHigh | registerSrc; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::ISMULH_M); - assert(ibc.idst == ®.r[registerDst]); - assert(ibc.isrc == ®.r[registerSrc]); - assert(ibc.imm == imm64); - assert(ibc.memMask == randomx::ScratchpadL1Mask); - }); - - runTest("IMUL_RCP (decode)", RANDOMX_FREQ_IMUL_RCP > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_IMUL_RCP - 1; - instr.dst = registerHigh | registerDst; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::IMUL_R); - assert(ibc.idst == ®.r[registerDst]); - assert(ibc.isrc == &ibc.imm); - assert(ibc.imm == randomx_reciprocal(imm32)); - }); - - runTest("IMUL_RCP zero imm32 (decode)", RANDOMX_FREQ_IMUL_RCP > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_IMUL_RCP - 1; - instr.setImm32(0); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::NOP); - }); - - runTest("INEG_R (decode)", RANDOMX_FREQ_INEG_R > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_INEG_R - 1; - instr.dst = registerHigh | registerDst; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::INEG_R); - assert(ibc.idst == ®.r[registerDst]); - }); - - runTest("INEG_R (execute)", RANDOMX_FREQ_INEG_R > 0, [&] { - reg.r[registerDst] = 0xFFFFFFFFFFFFFFFF; - decoder.executeInstruction(ibc, pc, nullptr, config); - assert(reg.r[registerDst] == 1); - }); - - runTest("IXOR_R (decode)", RANDOMX_FREQ_IXOR_R > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_IXOR_R - 1; - instr.dst = registerHigh | registerDst; - instr.src = registerHigh | registerSrc; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::IXOR_R); - assert(ibc.idst == ®.r[registerDst]); - assert(ibc.isrc == ®.r[registerSrc]); - }); - - runTest("IXOR_R (execute)", RANDOMX_FREQ_IMUL_R > 0, [&] { - reg.r[registerDst] = 0x8888888888888888; - reg.r[registerSrc] = 0xAAAAAAAAAAAAAAAA; - decoder.executeInstruction(ibc, pc, nullptr, config); - assert(reg.r[registerDst] == 0x2222222222222222); - }); - - runTest("IXOR_R with immediate (decode)", RANDOMX_FREQ_IXOR_R > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_IXOR_R - 1; - instr.dst = registerHigh | registerDst; - instr.src = registerHigh | registerDst; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::IXOR_R); - assert(ibc.idst == ®.r[registerDst]); - assert(ibc.isrc == &ibc.imm); - }); - - runTest("IXOR_R with immediate (execute)", RANDOMX_FREQ_IXOR_R > 0, [&] { - reg.r[registerDst] = 0xFFFFFFFFFFFFFFFF; - decoder.executeInstruction(ibc, pc, nullptr, config); - assert(reg.r[registerDst] == ~imm64); - }); - - runTest("IXOR_M (decode)", RANDOMX_FREQ_IXOR_M > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_IXOR_M - 1; - instr.dst = registerHigh | registerDst; - instr.src = registerHigh | registerDst; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::IXOR_M); - assert(ibc.idst == ®.r[registerDst]); - assert(*ibc.isrc == 0); - assert(ibc.imm == imm64); - assert(ibc.memMask == randomx::ScratchpadL3Mask); - }); - - runTest("IROR_R (decode)", RANDOMX_FREQ_IROR_R > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_IROR_R - 1; - instr.dst = registerHigh | registerDst; - instr.src = registerHigh | registerSrc; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::IROR_R); - assert(ibc.idst == ®.r[registerDst]); - assert(ibc.isrc == ®.r[registerSrc]); - }); - - runTest("IROR_R (execute)", RANDOMX_FREQ_IROR_R > 0, [&] { - reg.r[registerDst] = 953360005391419562; - reg.r[registerSrc] = 4569451684712230561; - decoder.executeInstruction(ibc, pc, nullptr, config); - assert(reg.r[registerDst] == 0xD835C455069D81EF); - }); - - runTest("IROL_R (decode)", RANDOMX_FREQ_IROL_R > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_IROL_R - 1; - instr.dst = registerHigh | registerDst; - instr.src = registerHigh | registerSrc; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::IROL_R); - assert(ibc.idst == ®.r[registerDst]); - assert(ibc.isrc == ®.r[registerSrc]); - }); - - runTest("IROL_R (execute)", RANDOMX_FREQ_IROL_R > 0, [&] { - reg.r[registerDst] = 953360005391419562; - reg.r[registerSrc] = 4569451684712230561; - decoder.executeInstruction(ibc, pc, nullptr, config); - assert(reg.r[registerDst] == 6978065200552740799); - }); - - runTest("ISWAP_R (decode)", RANDOMX_FREQ_ISWAP_R > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_ISWAP_R - 1; - instr.dst = registerHigh | registerDst; - instr.src = registerHigh | registerSrc; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::ISWAP_R); - assert(ibc.idst == ®.r[registerDst]); - assert(ibc.isrc == ®.r[registerSrc]); - }); - - runTest("ISWAP_R (execute)", RANDOMX_FREQ_ISWAP_R > 0, [&] { - reg.r[registerDst] = 953360005391419562; - reg.r[registerSrc] = 4569451684712230561; - decoder.executeInstruction(ibc, pc, nullptr, config); - assert(reg.r[registerDst] == 4569451684712230561); - assert(reg.r[registerSrc] == 953360005391419562); - }); - - runTest("FSWAP_R (decode)", RANDOMX_FREQ_FSWAP_R > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_FSWAP_R - 1; - instr.dst = registerHigh | registerDst; - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::FSWAP_R); - assert(ibc.fdst == ®.f[registerDst]); - }); - - runTest("FSWAP_R (execute)", RANDOMX_FREQ_FSWAP_R > 0, [&] { - alignas(16) uint64_t vec[2]; - reg.f[registerDst] = rx_set_vec_f128(953360005391419562, 4569451684712230561); - decoder.executeInstruction(ibc, pc, nullptr, config); - rx_store_vec_f128((double*)&vec, reg.f[registerDst]); - assert(equalsHex((const char*)&vec, "aa886bb0df033b0da12e95e518f4693f")); - }); - - runTest("FADD_R (decode)", RANDOMX_FREQ_FADD_R > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_FADD_R - 1; - instr.dst = registerHigh | registerDst; - instr.src = registerHigh | registerSrc; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::FADD_R); - assert(ibc.fdst == ®.f[registerDst]); - assert(ibc.fsrc == ®.a[registerSrc]); - }); - - runTest("FADD_R RoundToNearest (execute)", RANDOMX_FREQ_FADD_R > 0, [&] { - alignas(16) uint64_t vec[2]; - reg.f[registerDst] = rx_set_vec_f128(0x3ffd2c97cc4ef015, 0xc1ce30b3c4223576); - reg.a[registerSrc] = rx_set_vec_f128(0x402a26a86a60c8fb, 0x40b8f684057a59e1); - rx_set_rounding_mode(RoundToNearest); - decoder.executeInstruction(ibc, pc, nullptr, config); - rx_store_vec_f128((double*)&vec, reg.f[registerDst]); - assert(equalsHex(&vec, "b932e048a730cec1fea6ea633bcc2d40")); - }); - - runTest("FADD_R RoundDown (execute)", RANDOMX_FREQ_FADD_R > 0, [&] { - alignas(16) uint64_t vec[2]; - reg.f[registerDst] = rx_set_vec_f128(0x3ffd2c97cc4ef015, 0xc1ce30b3c4223576); - reg.a[registerSrc] = rx_set_vec_f128(0x402a26a86a60c8fb, 0x40b8f684057a59e1); - rx_set_rounding_mode(RoundDown); - decoder.executeInstruction(ibc, pc, nullptr, config); - rx_store_vec_f128((double*)&vec, reg.f[registerDst]); - assert(equalsHex(&vec, "b932e048a730cec1fda6ea633bcc2d40")); - }); - - runTest("FADD_R RoundUp (execute)", RANDOMX_FREQ_FADD_R > 0, [&] { - alignas(16) uint64_t vec[2]; - reg.f[registerDst] = rx_set_vec_f128(0x3ffd2c97cc4ef015, 0xc1ce30b3c4223576); - reg.a[registerSrc] = rx_set_vec_f128(0x402a26a86a60c8fb, 0x40b8f684057a59e1); - rx_set_rounding_mode(RoundUp); - decoder.executeInstruction(ibc, pc, nullptr, config); - rx_store_vec_f128((double*)&vec, reg.f[registerDst]); - assert(equalsHex(&vec, "b832e048a730cec1fea6ea633bcc2d40")); - }); - - runTest("FADD_R RoundToZero (execute)", RANDOMX_FREQ_FADD_R > 0, [&] { - alignas(16) uint64_t vec[2]; - reg.f[registerDst] = rx_set_vec_f128(0x3ffd2c97cc4ef015, 0xc1ce30b3c4223576); - reg.a[registerSrc] = rx_set_vec_f128(0x402a26a86a60c8fb, 0x40b8f684057a59e1); - rx_set_rounding_mode(RoundToZero); - decoder.executeInstruction(ibc, pc, nullptr, config); - rx_store_vec_f128((double*)&vec, reg.f[registerDst]); - assert(equalsHex(&vec, "b832e048a730cec1fda6ea633bcc2d40")); - }); - - runTest("FADD_M (decode)", RANDOMX_FREQ_FADD_M > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_FADD_M - 1; - instr.mod = 1; - instr.dst = registerHigh | registerDst; - instr.src = registerHigh | registerSrc; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::FADD_M); - assert(ibc.fdst == ®.f[registerDst]); - assert(ibc.isrc == ®.r[registerSrc]); - assert(ibc.imm == imm64); - assert(ibc.memMask == randomx::ScratchpadL1Mask); - }); - - runTest("FADD_M (execute)", RANDOMX_FREQ_FADD_R > 0, [&] { - uint64_t mockScratchpad; - store64(&mockScratchpad, 0x1234567890abcdef); - alignas(16) uint64_t vec[2]; - reg.f[registerDst] = rx_set_vec_f128(0, 0); - reg.r[registerSrc] = 0xFFFFFFFFFFFFE930; - rx_set_rounding_mode(RoundToNearest); - decoder.executeInstruction(ibc, pc, (uint8_t*)&mockScratchpad, config); - rx_store_vec_f128((double*)&vec, reg.f[registerDst]); - assert(equalsHex(&vec, "000040840cd5dbc1000000785634b241")); - }); - - runTest("FSUB_R (decode)", RANDOMX_FREQ_FSUB_R > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_FSUB_R - 1; - instr.dst = registerHigh | registerDst; - instr.src = registerHigh | registerSrc; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::FSUB_R); - assert(ibc.fdst == ®.f[registerDst]); - assert(ibc.fsrc == ®.a[registerSrc]); - }); - - runTest("FSUB_M (decode)", RANDOMX_FREQ_FSUB_M > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_FSUB_M - 1; - instr.mod = 2; - instr.dst = registerHigh | registerDst; - instr.src = registerHigh | registerSrc; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::FSUB_M); - assert(ibc.fdst == ®.f[registerDst]); - assert(ibc.isrc == ®.r[registerSrc]); - assert(ibc.imm == imm64); - assert(ibc.memMask == randomx::ScratchpadL1Mask); - }); - - runTest("FSCAL_R (decode)", RANDOMX_FREQ_FSCAL_R > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_FSCAL_R - 1; - instr.dst = registerHigh | registerDst; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::FSCAL_R); - assert(ibc.fdst == ®.f[registerDst]); - }); - - runTest("FSCAL_R (execute)", RANDOMX_FREQ_FSCAL_R > 0, [&] { - alignas(16) uint64_t vec[2]; - reg.f[registerDst] = rx_set_vec_f128(0x41dbc35cef248783, 0x40fdfdabb6173d07); - decoder.executeInstruction(ibc, pc, nullptr, config); - rx_store_vec_f128((double*)&vec, reg.f[registerDst]); - assert(equalsHex((const char*)&vec, "073d17b6abfd0dc0838724ef5cc32bc1")); - }); - - runTest("FMUL_R (decode)", RANDOMX_FREQ_FMUL_R > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_FMUL_R - 1; - instr.dst = registerHigh | registerDst; - instr.src = registerHigh | registerSrc; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::FMUL_R); - assert(ibc.fdst == ®.e[registerDst]); - assert(ibc.fsrc == ®.a[registerSrc]); - }); - - runTest("FMUL_R RoundToNearest (execute)", RANDOMX_FREQ_FMUL_R > 0, [&] { - alignas(16) uint64_t vec[2]; - reg.e[registerDst] = rx_set_vec_f128(0x41dbc35cef248783, 0x40fdfdabb6173d07); - reg.a[registerSrc] = rx_set_vec_f128(0x40eba861aa31c7c0, 0x41c4561212ae2d50); - rx_set_rounding_mode(RoundToNearest); - decoder.executeInstruction(ibc, pc, nullptr, config); - rx_store_vec_f128((double*)&vec, reg.e[registerDst]); - assert(equalsHex(&vec, "69697aff350fd3422f1589cdecfed742")); - }); - - runTest("FMUL_R RoundDown/RoundToZero (execute)", RANDOMX_FREQ_FMUL_R > 0, [&] { - alignas(16) uint64_t vec[2]; - reg.e[registerDst] = rx_set_vec_f128(0x41dbc35cef248783, 0x40fdfdabb6173d07); - reg.a[registerSrc] = rx_set_vec_f128(0x40eba861aa31c7c0, 0x41c4561212ae2d50); - rx_set_rounding_mode(RoundDown); - decoder.executeInstruction(ibc, pc, nullptr, config); - rx_store_vec_f128((double*)&vec, reg.e[registerDst]); - assert(equalsHex(&vec, "69697aff350fd3422e1589cdecfed742")); - }); - - runTest("FMUL_R RoundUp (execute)", RANDOMX_FREQ_FMUL_R > 0, [&] { - alignas(16) uint64_t vec[2]; - reg.e[registerDst] = rx_set_vec_f128(0x41dbc35cef248783, 0x40fdfdabb6173d07); - reg.a[registerSrc] = rx_set_vec_f128(0x40eba861aa31c7c0, 0x41c4561212ae2d50); - rx_set_rounding_mode(RoundUp); - decoder.executeInstruction(ibc, pc, nullptr, config); - rx_store_vec_f128((double*)&vec, reg.e[registerDst]); - assert(equalsHex(&vec, "6a697aff350fd3422f1589cdecfed742")); - }); - - runTest("FDIV_M (decode)", RANDOMX_FREQ_FDIV_M > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_FDIV_M - 1; - instr.mod = 3; - instr.dst = registerHigh | registerDst; - instr.src = registerHigh | registerSrc; - instr.setImm32(imm32); - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::FDIV_M); - assert(ibc.fdst == ®.e[registerDst]); - assert(ibc.isrc == ®.r[registerSrc]); - assert(ibc.imm == imm64); - assert(ibc.memMask == randomx::ScratchpadL1Mask); - }); - - runTest("FDIV_M RoundToNearest (execute)", RANDOMX_FREQ_FDIV_M > 0, [&] { - alignas(16) uint64_t vec[2]; - alignas(16) uint32_t mockScratchpad[2]; - store32(&mockScratchpad[0], 0xd350a1b6); - store32(&mockScratchpad[1], 0x8b2460d9); - store64(&config.eMask[0], 0x3a0000000005d11a); - store64(&config.eMask[1], 0x39000000001ba31e); - reg.e[registerDst] = rx_set_vec_f128(0x41937f76fede16ee, 0x411b414296ce93b6); - reg.r[registerSrc] = 0xFFFFFFFFFFFFE930; - rx_set_rounding_mode(RoundToNearest); - decoder.executeInstruction(ibc, pc, (uint8_t*)&mockScratchpad, config); - rx_store_vec_f128((double*)&vec, reg.e[registerDst]); - assert(equalsHex(&vec, "e7b269639484434632474a66635ba547")); - }); - - runTest("FDIV_M RoundDown/RoundToZero (execute)", RANDOMX_FREQ_FDIV_M > 0, [&] { - alignas(16) uint64_t vec[2]; - alignas(16) uint32_t mockScratchpad[2]; - store32(&mockScratchpad[0], 0xd350a1b6); - store32(&mockScratchpad[1], 0x8b2460d9); - store64(&config.eMask[0], 0x3a0000000005d11a); - store64(&config.eMask[1], 0x39000000001ba31e); - reg.e[registerDst] = rx_set_vec_f128(0x41937f76fede16ee, 0x411b414296ce93b6); - reg.r[registerSrc] = 0xFFFFFFFFFFFFE930; - rx_set_rounding_mode(RoundDown); - decoder.executeInstruction(ibc, pc, (uint8_t*)&mockScratchpad, config); - rx_store_vec_f128((double*)&vec, reg.e[registerDst]); - assert(equalsHex(&vec, "e6b269639484434632474a66635ba547")); - }); - - runTest("FDIV_M RoundUp (execute)", RANDOMX_FREQ_FDIV_M > 0, [&] { - alignas(16) uint64_t vec[2]; - alignas(16) uint32_t mockScratchpad[2]; - store32(&mockScratchpad[0], 0xd350a1b6); - store32(&mockScratchpad[1], 0x8b2460d9); - store64(&config.eMask[0], 0x3a0000000005d11a); - store64(&config.eMask[1], 0x39000000001ba31e); - reg.e[registerDst] = rx_set_vec_f128(0x41937f76fede16ee, 0x411b414296ce93b6); - reg.r[registerSrc] = 0xFFFFFFFFFFFFE930; - rx_set_rounding_mode(RoundUp); - decoder.executeInstruction(ibc, pc, (uint8_t*)&mockScratchpad, config); - rx_store_vec_f128((double*)&vec, reg.e[registerDst]); - assert(equalsHex(&vec, "e7b269639484434633474a66635ba547")); - }); - - runTest("FSQRT_R (decode)", RANDOMX_FREQ_FSQRT_R > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_FSQRT_R - 1; - instr.dst = registerHigh | registerDst; - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::FSQRT_R); - assert(ibc.fdst == ®.e[registerDst]); - }); - - runTest("FSQRT_R RoundToNearest (execute)", RANDOMX_FREQ_FSQRT_R > 0, [&] { - alignas(16) uint64_t vec[2]; - reg.e[registerDst] = rx_set_vec_f128(0x41b6b21c11affea7, 0x40526a7e778d9824); - rx_set_rounding_mode(RoundToNearest); - decoder.executeInstruction(ibc, pc, nullptr, config); - rx_store_vec_f128((double*)&vec, reg.e[registerDst]); - assert(equalsHex(&vec, "e81f300b612a21408dbaa33f570ed340")); - }); - - runTest("FSQRT_R RoundDown/RoundToZero (execute)", RANDOMX_FREQ_FSQRT_R > 0, [&] { - alignas(16) uint64_t vec[2]; - reg.e[registerDst] = rx_set_vec_f128(0x41b6b21c11affea7, 0x40526a7e778d9824); - rx_set_rounding_mode(RoundDown); - decoder.executeInstruction(ibc, pc, nullptr, config); - rx_store_vec_f128((double*)&vec, reg.e[registerDst]); - assert(equalsHex(&vec, "e81f300b612a21408cbaa33f570ed340")); - }); - - runTest("FSQRT_R RoundUp (execute)", RANDOMX_FREQ_FSQRT_R > 0, [&] { - alignas(16) uint64_t vec[2]; - reg.e[registerDst] = rx_set_vec_f128(0x41b6b21c11affea7, 0x40526a7e778d9824); - rx_set_rounding_mode(RoundUp); - decoder.executeInstruction(ibc, pc, nullptr, config); - rx_store_vec_f128((double*)&vec, reg.e[registerDst]); - assert(equalsHex(&vec, "e91f300b612a21408dbaa33f570ed340")); - }); - - runTest("CBRANCH (decode) 100", RANDOMX_FREQ_CBRANCH > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_CBRANCH - 1; - instr.dst = registerHigh | registerDst; - instr.setImm32(imm32); - instr.mod = 48; - decoder.compileInstruction(instr, 100, ibc); - assert(ibc.type == randomx::InstructionType::CBRANCH); - assert(ibc.idst == ®.r[registerDst]); - assert(ibc.imm == 0xFFFFFFFFC0CB9AD2); - assert(ibc.memMask == 0x7F800); - assert(ibc.target == pc); - }); - - runTest("CBRANCH (decode) 200", RANDOMX_FREQ_CBRANCH > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_CBRANCH - 1; - instr.dst = registerHigh | registerDst; - instr.setImm32(imm32); - instr.mod = 48; - decoder.compileInstruction(instr, pc = 200, ibc); - assert(ibc.type == randomx::InstructionType::CBRANCH); - assert(ibc.idst == ®.r[registerDst]); - assert(ibc.imm == 0xFFFFFFFFC0CB9AD2); - assert(ibc.memMask == 0x7F800); - assert(ibc.target == 100); - }); - - runTest("CBRANCH not taken (execute)", RANDOMX_FREQ_CBRANCH > 0, [&] { - reg.r[registerDst] = 0; - decoder.executeInstruction(ibc, pc, nullptr, config); - assert(pc == 200); - }); - - runTest("CBRANCH taken (execute)", RANDOMX_FREQ_CBRANCH > 0, [&] { - reg.r[registerDst] = 0xFFFFFFFFFFFC6800; - decoder.executeInstruction(ibc, pc, nullptr, config); - assert(pc == ibc.target); - }); - - runTest("CFROUND (decode)", RANDOMX_FREQ_CFROUND > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_CFROUND - 1; - instr.src = registerHigh | registerSrc; - instr.setImm32(imm32); - decoder.compileInstruction(instr, 100, ibc); - assert(ibc.type == randomx::InstructionType::CFROUND); - assert(ibc.isrc == ®.r[registerSrc]); - assert(ibc.imm == 18); - }); - - runTest("ISTORE L1 (decode)", RANDOMX_FREQ_ISTORE > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_ISTORE - 1; - instr.src = registerHigh | registerSrc; - instr.dst = registerHigh | registerDst; - instr.setImm32(imm32); - instr.mod = 1; - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::ISTORE); - assert(ibc.idst == ®.r[registerDst]); - assert(ibc.isrc == ®.r[registerSrc]); - assert(ibc.imm == imm64); - assert(ibc.memMask == randomx::ScratchpadL1Mask); - }); - - runTest("ISTORE L2 (decode)", RANDOMX_FREQ_ISTORE > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_ISTORE - 1; - instr.src = registerHigh | registerSrc; - instr.dst = registerHigh | registerDst; - instr.setImm32(imm32); - instr.mod = 0; - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::ISTORE); - assert(ibc.idst == ®.r[registerDst]); - assert(ibc.isrc == ®.r[registerSrc]); - assert(ibc.imm == imm64); - assert(ibc.memMask == randomx::ScratchpadL2Mask); - }); - - runTest("ISTORE L3 (decode)", RANDOMX_FREQ_ISTORE > 0, [&] { - randomx::Instruction instr; - instr.opcode = randomx::ceil_ISTORE - 1; - instr.src = registerHigh | registerSrc; - instr.dst = registerHigh | registerDst; - instr.setImm32(imm32); - instr.mod = 224; - decoder.compileInstruction(instr, pc, ibc); - assert(ibc.type == randomx::InstructionType::ISTORE); - assert(ibc.idst == ®.r[registerDst]); - assert(ibc.isrc == ®.r[registerSrc]); - assert(ibc.imm == imm64); - assert(ibc.memMask == randomx::ScratchpadL3Mask); - }); - -#ifdef RANDOMX_FORCE_SECURE - vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT | RANDOMX_FLAG_SECURE, cache, nullptr); -#else - vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT, cache, nullptr); -#endif - - auto test_a = [&] { - char hash[RANDOMX_HASH_SIZE]; - calcStringHash("test key 000", "This is a test", &hash); - assert(equalsHex(hash, "639183aae1bf4c9a35884cb46b09cad9175f04efd7684e7262a0ac1c2f0b4e3f")); - }; - - auto test_b = [&] { - char hash[RANDOMX_HASH_SIZE]; - calcStringHash("test key 000", "Lorem ipsum dolor sit amet", &hash); - assert(equalsHex(hash, "300a0adb47603dedb42228ccb2b211104f4da45af709cd7547cd049e9489c969")); - }; - - auto test_c = [&] { - char hash[RANDOMX_HASH_SIZE]; - calcStringHash("test key 000", "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua", &hash); - assert(equalsHex(hash, "c36d4ed4191e617309867ed66a443be4075014e2b061bcdaf9ce7b721d2b77a8")); - }; - - auto test_d = [&] { - char hash[RANDOMX_HASH_SIZE]; - calcStringHash("test key 001", "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua", &hash); - assert(equalsHex(hash, "e9ff4503201c0c2cca26d285c93ae883f9b1d30c9eb240b820756f2d5a7905fc")); - }; - - auto test_e = [&] { - char hash[RANDOMX_HASH_SIZE]; - calcHexHash("test key 001", "0b0b98bea7e805e0010a2126d287a2a0cc833d312cb786385a7c2f9de69d25537f584a9bc9977b00000000666fd8753bf61a8631f12984e3fd44f4014eca629276817b56f32e9b68bd82f416", &hash); - //std::cout << std::endl; - //outputHex(std::cout, (const char*)hash, sizeof(hash)); - //std::cout << std::endl; - assert(equalsHex(hash, "c56414121acda1713c2f2a819d8ae38aed7c80c35c2a769298d34f03833cd5f1")); - }; - - runTest("Hash test 1a (interpreter)", stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_a); - - runTest("Hash test 1b (interpreter)", stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_b); - - runTest("Hash test 1c (interpreter)", stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_c); - - runTest("Hash test 1d (interpreter)", stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_d); - - runTest("Hash test 1e (interpreter)", stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_e); - - if (RANDOMX_HAVE_COMPILER) { - randomx_release_cache(cache); - randomx_destroy_vm(vm); - vm = nullptr; - cache = randomx_alloc_cache(RANDOMX_FLAG_JIT); - initCache("test key 000"); -#ifdef RANDOMX_FORCE_SECURE - vm = randomx_create_vm(RANDOMX_FLAG_JIT | RANDOMX_FLAG_SECURE, cache, nullptr); -#else - vm = randomx_create_vm(RANDOMX_FLAG_JIT, cache, nullptr); -#endif - } - - runTest("Hash test 2a (compiler)", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_a); - - runTest("Hash test 2b (compiler)", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_b); - - runTest("Hash test 2c (compiler)", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_c); - - runTest("Hash test 2d (compiler)", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_d); - - runTest("Hash test 2e (compiler)", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_e); - - auto flags = randomx_get_flags(); - - randomx_release_cache(cache); - cache = randomx_alloc_cache(RANDOMX_FLAG_ARGON2_SSSE3); - - runTest("Cache initialization: SSSE3", (flags & RANDOMX_FLAG_ARGON2_SSSE3) && RANDOMX_ARGON_ITERATIONS == 3 && RANDOMX_ARGON_LANES == 1 && RANDOMX_ARGON_MEMORY == 262144 && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), []() { - initCache("test key 000"); - uint64_t* cacheMemory = (uint64_t*)cache->memory; - assert(cacheMemory[0] == 0x191e0e1d23c02186); - assert(cacheMemory[1568413] == 0xf1b62fe6210bf8b1); - assert(cacheMemory[33554431] == 0x1f47f056d05cd99b); - }); - - if (cache != nullptr) - randomx_release_cache(cache); - cache = randomx_alloc_cache(RANDOMX_FLAG_ARGON2_AVX2); - - runTest("Cache initialization: AVX2", (flags & RANDOMX_FLAG_ARGON2_AVX2) && RANDOMX_ARGON_ITERATIONS == 3 && RANDOMX_ARGON_LANES == 1 && RANDOMX_ARGON_MEMORY == 262144 && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), []() { - initCache("test key 000"); - uint64_t* cacheMemory = (uint64_t*)cache->memory; - assert(cacheMemory[0] == 0x191e0e1d23c02186); - assert(cacheMemory[1568413] == 0xf1b62fe6210bf8b1); - assert(cacheMemory[33554431] == 0x1f47f056d05cd99b); - }); - - if (cache != nullptr) - randomx_release_cache(cache); - cache = randomx_alloc_cache(RANDOMX_FLAG_DEFAULT); - - runTest("Hash batch test", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), []() { - char hash1[RANDOMX_HASH_SIZE]; - char hash2[RANDOMX_HASH_SIZE]; - char hash3[RANDOMX_HASH_SIZE]; - initCache("test key 000"); - char input1[] = "This is a test"; - char input2[] = "Lorem ipsum dolor sit amet"; - char input3[] = "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua"; - - randomx_calculate_hash_first(vm, input1, sizeof(input1) - 1); - randomx_calculate_hash_next(vm, input2, sizeof(input2) - 1, &hash1); - randomx_calculate_hash_next(vm, input3, sizeof(input3) - 1, &hash2); - randomx_calculate_hash_last(vm, &hash3); - - assert(equalsHex(hash1, "639183aae1bf4c9a35884cb46b09cad9175f04efd7684e7262a0ac1c2f0b4e3f")); - assert(equalsHex(hash2, "300a0adb47603dedb42228ccb2b211104f4da45af709cd7547cd049e9489c969")); - assert(equalsHex(hash3, "c36d4ed4191e617309867ed66a443be4075014e2b061bcdaf9ce7b721d2b77a8")); - }); - - runTest("Preserve rounding mode", RANDOMX_FREQ_CFROUND > 0, []() { - rx_set_rounding_mode(RoundToNearest); - char hash[RANDOMX_HASH_SIZE]; - calcStringHash("test key 000", "Lorem ipsum dolor sit amet", &hash); - assert(equalsHex(hash, "300a0adb47603dedb42228ccb2b211104f4da45af709cd7547cd049e9489c969")); - assert(rx_get_rounding_mode() == RoundToNearest); - }); - - randomx_destroy_vm(vm); - vm = nullptr; - - if (cache != nullptr) - randomx_release_cache(cache); - - std::cout << std::endl << "All tests PASSED" << std::endl; - - if (skipped) { - std::cout << skipped << " tests were SKIPPED due to incompatible configuration (see above)" << std::endl; - } -} diff --git a/external/src/randomx/src/tests/utility.hpp b/external/src/randomx/src/tests/utility.hpp deleted file mode 100644 index 92723b9..0000000 --- a/external/src/randomx/src/tests/utility.hpp +++ /dev/null @@ -1,124 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include -#include -#include -#include - -constexpr char hexmap[] = "0123456789abcdef"; -inline void outputHex(std::ostream& os, const char* data, int length) { - for (int i = 0; i < length; ++i) { - os << hexmap[(data[i] & 0xF0) >> 4]; - os << hexmap[data[i] & 0x0F]; - } -} - -char parseNibble(char hex) { - hex &= ~0x20; - if (hex & 0x40) { - hex -= 'A' - 10; - } - else { - hex &= 0xf; - } - return hex; -} - -void hex2bin(const char *in, int length, char *out) { - for (int i = 0; i < length; i += 2) { - char nibble1 = parseNibble(*in++); - char nibble2 = parseNibble(*in++); - *out++ = nibble1 << 4 | nibble2; - } -} - -constexpr bool stringsEqual(char const * a, char const * b) { - return *a == *b && (*a == '\0' || stringsEqual(a + 1, b + 1)); -} - -template -bool equalsHex(const void* hash, const char (&hex)[N]) { - char reference[N / 2]; - hex2bin(hex, N - 1, reference); - return memcmp(hash, reference, sizeof(reference)) == 0; -} - -inline void dump(const char* buffer, uint64_t count, const char* name) { - std::ofstream fout(name, std::ios::out | std::ios::binary); - fout.write(buffer, count); - fout.close(); -} - -inline void readOption(const char* option, int argc, char** argv, bool& out) { - for (int i = 0; i < argc; ++i) { - if (strcmp(argv[i], option) == 0) { - out = true; - return; - } - } - out = false; -} - -inline void readIntOption(const char* option, int argc, char** argv, int& out, int defaultValue) { - for (int i = 0; i < argc - 1; ++i) { - if (strcmp(argv[i], option) == 0 && (out = atoi(argv[i + 1])) > 0) { - return; - } - } - out = defaultValue; -} - -inline void readUInt64Option(const char* option, int argc, char** argv, uint64_t& out, uint64_t defaultValue) { - for (int i = 0; i < argc - 1; ++i) { - if (strcmp(argv[i], option) == 0 && (out = std::strtoull(argv[i + 1], NULL, 0)) > 0) { - return; - } - } - out = defaultValue; -} - -inline void readFloatOption(const char* option, int argc, char** argv, double& out, double defaultValue) { - for (int i = 0; i < argc - 1; ++i) { - if (strcmp(argv[i], option) == 0 && (out = atof(argv[i + 1])) > 0) { - return; - } - } - out = defaultValue; -} - -inline void readInt(int argc, char** argv, int& out, int defaultValue) { - for (int i = 0; i < argc; ++i) { - if (*argv[i] != '-' && (out = atoi(argv[i])) > 0) { - return; - } - } - out = defaultValue; -} diff --git a/external/src/randomx/src/virtual_machine.cpp b/external/src/randomx/src/virtual_machine.cpp deleted file mode 100644 index 2d5d2be..0000000 --- a/external/src/randomx/src/virtual_machine.cpp +++ /dev/null @@ -1,143 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include -#include -#include -#include "virtual_machine.hpp" -#include "common.hpp" -#include "aes_hash.hpp" -#include "blake2/blake2.h" -#include "intrin_portable.h" -#include "allocator.hpp" - -randomx_vm::~randomx_vm() { - -} - -void randomx_vm::resetRoundingMode() { - rx_reset_float_state(); -} - -namespace randomx { - - static inline uint64_t getSmallPositiveFloatBits(uint64_t entropy) { - auto exponent = entropy >> 59; //0..31 - auto mantissa = entropy & mantissaMask; - exponent += exponentBias; - exponent &= exponentMask; - exponent <<= mantissaSize; - return exponent | mantissa; - } - - static inline uint64_t getStaticExponent(uint64_t entropy) { - auto exponent = constExponentBits; - exponent |= (entropy >> (64 - staticExponentBits)) << dynamicExponentBits; - exponent <<= mantissaSize; - return exponent; - } - - static inline uint64_t getFloatMask(uint64_t entropy) { - constexpr uint64_t mask22bit = (1ULL << 22) - 1; - return (entropy & mask22bit) | getStaticExponent(entropy); - } - -} - -void randomx_vm::initialize() { - store64(®.a[0].lo, randomx::getSmallPositiveFloatBits(program.getEntropy(0))); - store64(®.a[0].hi, randomx::getSmallPositiveFloatBits(program.getEntropy(1))); - store64(®.a[1].lo, randomx::getSmallPositiveFloatBits(program.getEntropy(2))); - store64(®.a[1].hi, randomx::getSmallPositiveFloatBits(program.getEntropy(3))); - store64(®.a[2].lo, randomx::getSmallPositiveFloatBits(program.getEntropy(4))); - store64(®.a[2].hi, randomx::getSmallPositiveFloatBits(program.getEntropy(5))); - store64(®.a[3].lo, randomx::getSmallPositiveFloatBits(program.getEntropy(6))); - store64(®.a[3].hi, randomx::getSmallPositiveFloatBits(program.getEntropy(7))); - mem.ma = program.getEntropy(8) & randomx::CacheLineAlignMask; - mem.mx = program.getEntropy(10); - auto addressRegisters = program.getEntropy(12); - config.readReg0 = 0 + (addressRegisters & 1); - addressRegisters >>= 1; - config.readReg1 = 2 + (addressRegisters & 1); - addressRegisters >>= 1; - config.readReg2 = 4 + (addressRegisters & 1); - addressRegisters >>= 1; - config.readReg3 = 6 + (addressRegisters & 1); - datasetOffset = (program.getEntropy(13) % (randomx::DatasetExtraItems + 1)) * randomx::CacheLineSize; - store64(&config.eMask[0], randomx::getFloatMask(program.getEntropy(14))); - store64(&config.eMask[1], randomx::getFloatMask(program.getEntropy(15))); -} - -namespace randomx { - - alignas(16) volatile static rx_vec_i128 aesDummy; - - template - VmBase::~VmBase() { - Allocator::freeMemory(scratchpad, ScratchpadSize); - } - - template - void VmBase::allocate() { - if (datasetPtr == nullptr) - throw std::invalid_argument("Cache/Dataset not set"); - if (!softAes) { //if hardware AES is not supported, it's better to fail now than to return a ticking bomb - rx_vec_i128 tmp = rx_load_vec_i128((const rx_vec_i128*)&aesDummy); - tmp = rx_aesenc_vec_i128(tmp, tmp); - rx_store_vec_i128((rx_vec_i128*)&aesDummy, tmp); - } - scratchpad = (uint8_t*)Allocator::allocMemory(ScratchpadSize); - } - - template - void VmBase::getFinalResult(void* out, size_t outSize) { - hashAes1Rx4(scratchpad, ScratchpadSize, ®.a); - blake2b(out, outSize, ®, sizeof(RegisterFile), nullptr, 0); - } - - template - void VmBase::hashAndFill(void* out, size_t outSize, uint64_t *fill_state) { - hashAndFillAes1Rx4((void*) getScratchpad(), ScratchpadSize, ®.a, fill_state); - blake2b(out, outSize, ®, sizeof(RegisterFile), nullptr, 0); - } - - template - void VmBase::initScratchpad(void* seed) { - fillAes1Rx4(seed, ScratchpadSize, scratchpad); - } - - template - void VmBase::generateProgram(void* seed) { - fillAes4Rx4(seed, sizeof(program), &program); - } - - template class VmBase, false>; - template class VmBase, true>; - template class VmBase; - template class VmBase; -} \ No newline at end of file diff --git a/external/src/randomx/src/virtual_machine.hpp b/external/src/randomx/src/virtual_machine.hpp deleted file mode 100644 index c28eb21..0000000 --- a/external/src/randomx/src/virtual_machine.hpp +++ /dev/null @@ -1,95 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#ifdef _MSC_VER -#pragma warning(disable : 4324) -#endif - -#include -#include "common.hpp" -#include "program.hpp" - -/* Global namespace for C binding */ -class randomx_vm { -public: - virtual ~randomx_vm() = 0; - virtual void allocate() = 0; - virtual void getFinalResult(void* out, size_t outSize) = 0; - virtual void hashAndFill(void* out, size_t outSize, uint64_t *fill_state) = 0; - virtual void setDataset(randomx_dataset* /*dataset*/) { } - virtual void setCache(randomx_cache* /*cache*/) { } - virtual void initScratchpad(void* seed) = 0; - virtual void run(void* seed) = 0; - void resetRoundingMode(); - randomx::RegisterFile *getRegisterFile() { - return ® - } - const void* getScratchpad() { - return scratchpad; - } - const randomx::Program& getProgram() - { - return program; - } - const uint8_t* getMemory() const { - return mem.memory; - } -protected: - void initialize(); - alignas(64) randomx::Program program; - alignas(64) randomx::RegisterFile reg; - alignas(16) randomx::ProgramConfiguration config; - randomx::MemoryRegisters mem; - uint8_t* scratchpad = nullptr; - union { - randomx_cache* cachePtr = nullptr; - randomx_dataset* datasetPtr; - }; - uint64_t datasetOffset; -public: - std::string cacheKey; - alignas(16) uint64_t tempHash[8]; //8 64-bit values used to store intermediate data -}; - -namespace randomx { - - template - class VmBase : public randomx_vm { - public: - ~VmBase() override; - void allocate() override; - void initScratchpad(void* seed) override; - void getFinalResult(void* out, size_t outSize) override; - void hashAndFill(void* out, size_t outSize, uint64_t *fill_state) override; - protected: - void generateProgram(void* seed); - }; - -} diff --git a/external/src/randomx/src/virtual_memory.cpp b/external/src/randomx/src/virtual_memory.cpp deleted file mode 100644 index 262081a..0000000 --- a/external/src/randomx/src/virtual_memory.cpp +++ /dev/null @@ -1,197 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include "virtual_memory.hpp" - -#include - -#if defined(_WIN32) || defined(__CYGWIN__) -#include -#else -#ifdef __APPLE__ -#include -#include -# if defined(__aarch64__) && TARGET_OS_OSX -# define USE_PTHREAD_JIT_WP 1 -# include -# endif -#endif -#include -#include -#ifndef MAP_ANONYMOUS -#define MAP_ANONYMOUS MAP_ANON -#endif -#define PAGE_READONLY PROT_READ -#define PAGE_READWRITE (PROT_READ | PROT_WRITE) -#define PAGE_EXECUTE_READ (PROT_READ | PROT_EXEC) -#define PAGE_EXECUTE_READWRITE (PROT_READ | PROT_WRITE | PROT_EXEC) -#endif - -#if defined(_WIN32) || defined(__CYGWIN__) -std::string getErrorMessage(const char* function) { - LPSTR messageBuffer = nullptr; - size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, - NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&messageBuffer, 0, NULL); - std::string message(messageBuffer, size); - LocalFree(messageBuffer); - return std::string(function) + std::string(": ") + message; -} - -void setPrivilege(const char* pszPrivilege, BOOL bEnable) { - HANDLE hToken; - TOKEN_PRIVILEGES tp; - BOOL status; - DWORD error; - - if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken)) - throw std::runtime_error(getErrorMessage("OpenProcessToken")); - - if (!LookupPrivilegeValue(NULL, pszPrivilege, &tp.Privileges[0].Luid)) - throw std::runtime_error(getErrorMessage("LookupPrivilegeValue")); - - tp.PrivilegeCount = 1; - - if (bEnable) - tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; - else - tp.Privileges[0].Attributes = 0; - - status = AdjustTokenPrivileges(hToken, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0); - - error = GetLastError(); - if (!status || (error != ERROR_SUCCESS)) - throw std::runtime_error(getErrorMessage("AdjustTokenPrivileges")); - - if (!CloseHandle(hToken)) - throw std::runtime_error(getErrorMessage("CloseHandle")); -} -#endif - -void* allocMemoryPages(std::size_t bytes) { - void* mem; -#if defined(_WIN32) || defined(__CYGWIN__) - mem = VirtualAlloc(nullptr, bytes, MEM_COMMIT, PAGE_READWRITE); - if (mem == nullptr) - throw std::runtime_error(getErrorMessage("allocMemoryPages - VirtualAlloc")); -#else - #if defined(__NetBSD__) - #define RESERVED_FLAGS PROT_MPROTECT(PROT_EXEC) - #else - #define RESERVED_FLAGS 0 - #endif - #ifdef __APPLE__ - #include - #ifdef TARGET_OS_OSX - #define MEXTRA MAP_JIT - #else - #define MEXTRA 0 - #endif - #else - #define MEXTRA 0 - #endif - #ifdef USE_PTHREAD_JIT_WP - #define PEXTRA PROT_EXEC - #else - #define PEXTRA 0 - #endif - mem = mmap(nullptr, bytes, PAGE_READWRITE | RESERVED_FLAGS | PEXTRA, MAP_ANONYMOUS | MAP_PRIVATE | MEXTRA, -1, 0); - if (mem == MAP_FAILED) - throw std::runtime_error("allocMemoryPages - mmap failed"); -#ifdef USE_PTHREAD_JIT_WP - pthread_jit_write_protect_np(false); -#endif -#endif - return mem; -} - -static inline void pageProtect(void* ptr, std::size_t bytes, int rules) { -#if defined(_WIN32) || defined(__CYGWIN__) - DWORD oldp; - if (!VirtualProtect(ptr, bytes, (DWORD)rules, &oldp)) { - throw std::runtime_error(getErrorMessage("VirtualProtect")); - } -#else - if (-1 == mprotect(ptr, bytes, rules)) - throw std::runtime_error("mprotect failed"); -#endif -} - -void setPagesRW(void* ptr, std::size_t bytes) { -#ifdef USE_PTHREAD_JIT_WP - pthread_jit_write_protect_np(false); -#else - pageProtect(ptr, bytes, PAGE_READWRITE); -#endif -} - -void setPagesRX(void* ptr, std::size_t bytes) { -#ifdef USE_PTHREAD_JIT_WP - pthread_jit_write_protect_np(true); -#else - pageProtect(ptr, bytes, PAGE_EXECUTE_READ); -#endif -} - -void setPagesRWX(void* ptr, std::size_t bytes) { - pageProtect(ptr, bytes, PAGE_EXECUTE_READWRITE); -} - -void* allocLargePagesMemory(std::size_t bytes) { - void* mem; -#if defined(_WIN32) || defined(__CYGWIN__) - setPrivilege("SeLockMemoryPrivilege", 1); - auto pageMinimum = GetLargePageMinimum(); - if (pageMinimum > 0) - mem = VirtualAlloc(NULL, alignSize(bytes, pageMinimum), MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE); - else - throw std::runtime_error("allocLargePagesMemory - Large pages are not supported"); - if (mem == nullptr) - throw std::runtime_error(getErrorMessage("allocLargePagesMemory - VirtualAlloc")); -#else -#ifdef __APPLE__ - mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0); -#elif defined(__FreeBSD__) - mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER, -1, 0); -#elif defined(__OpenBSD__) || defined(__NetBSD__) - mem = MAP_FAILED; // OpenBSD does not support huge pages -#else - mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, 0); -#endif - if (mem == MAP_FAILED) - throw std::runtime_error("allocLargePagesMemory - mmap failed"); -#endif - return mem; -} - -void freePagedMemory(void* ptr, std::size_t bytes) { -#if defined(_WIN32) || defined(__CYGWIN__) - VirtualFree(ptr, 0, MEM_RELEASE); -#else - munmap(ptr, bytes); -#endif -} diff --git a/external/src/randomx/src/virtual_memory.hpp b/external/src/randomx/src/virtual_memory.hpp deleted file mode 100644 index 9e8bc29..0000000 --- a/external/src/randomx/src/virtual_memory.hpp +++ /dev/null @@ -1,42 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include - -constexpr std::size_t alignSize(std::size_t pos, std::size_t align) { - return ((pos - 1) / align + 1) * align; -} - -void* allocMemoryPages(std::size_t); -void setPagesRW(void*, std::size_t); -void setPagesRX(void*, std::size_t); -void setPagesRWX(void*, std::size_t); -void* allocLargePagesMemory(std::size_t); -void freePagedMemory(void*, std::size_t); diff --git a/external/src/randomx/src/vm_compiled.cpp b/external/src/randomx/src/vm_compiled.cpp deleted file mode 100644 index 060abec..0000000 --- a/external/src/randomx/src/vm_compiled.cpp +++ /dev/null @@ -1,80 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include "vm_compiled.hpp" -#include "common.hpp" - -namespace randomx { - - static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct randomx::MemoryRegisters"); - static_assert(sizeof(RegisterFile) == 256, "Invalid alignment of struct randomx::RegisterFile"); - - template - CompiledVm::CompiledVm() { - if (!secureJit) { - compiler.enableAll(); //make JIT buffer both writable and executable - } - } - - template - void CompiledVm::setDataset(randomx_dataset* dataset) { - datasetPtr = dataset; - } - - template - void CompiledVm::run(void* seed) { - VmBase::generateProgram(seed); - randomx_vm::initialize(); - if (secureJit) { - compiler.enableWriting(); - } - compiler.generateProgram(program, config); - if (secureJit) { - compiler.enableExecution(); - } - mem.memory = datasetPtr->memory + datasetOffset; - execute(); - } - - template - void CompiledVm::execute() { -#ifdef __aarch64__ - memcpy(reg.f, config.eMask, sizeof(config.eMask)); -#endif - compiler.getProgramFunc()(reg, mem, scratchpad, RANDOMX_PROGRAM_ITERATIONS); - } - - template class CompiledVm, false, false>; - template class CompiledVm, true, false>; - template class CompiledVm; - template class CompiledVm; - template class CompiledVm, false, true>; - template class CompiledVm, true, true>; - template class CompiledVm; - template class CompiledVm; -} \ No newline at end of file diff --git a/external/src/randomx/src/vm_compiled.hpp b/external/src/randomx/src/vm_compiled.hpp deleted file mode 100644 index f7ceb0a..0000000 --- a/external/src/randomx/src/vm_compiled.hpp +++ /dev/null @@ -1,77 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include -#include -#include "virtual_machine.hpp" -#include "jit_compiler.hpp" -#include "allocator.hpp" -#include "dataset.hpp" - -namespace randomx { - - template - class CompiledVm : public VmBase { - public: - void* operator new(size_t size) { - void* ptr = AlignedAllocator::allocMemory(size); - if (ptr == nullptr) - throw std::bad_alloc(); - return ptr; - } - void operator delete(void* ptr) { - AlignedAllocator::freeMemory(ptr, sizeof(CompiledVm)); - } - CompiledVm(); - void setDataset(randomx_dataset* dataset) override; - void run(void* seed) override; - - using VmBase::mem; - using VmBase::program; - using VmBase::config; - using VmBase::reg; - using VmBase::scratchpad; - using VmBase::datasetPtr; - using VmBase::datasetOffset; - protected: - void execute(); - - JitCompiler compiler; - }; - - using CompiledVmDefault = CompiledVm, true, false>; - using CompiledVmHardAes = CompiledVm, false, false>; - using CompiledVmLargePage = CompiledVm; - using CompiledVmLargePageHardAes = CompiledVm; - using CompiledVmDefaultSecure = CompiledVm, true, true>; - using CompiledVmHardAesSecure = CompiledVm, false, true>; - using CompiledVmLargePageSecure = CompiledVm; - using CompiledVmLargePageHardAesSecure = CompiledVm; -} diff --git a/external/src/randomx/src/vm_compiled_light.cpp b/external/src/randomx/src/vm_compiled_light.cpp deleted file mode 100644 index 98dff34..0000000 --- a/external/src/randomx/src/vm_compiled_light.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include "vm_compiled_light.hpp" -#include "common.hpp" -#include - -namespace randomx { - - template - void CompiledLightVm::setCache(randomx_cache* cache) { - cachePtr = cache; - mem.memory = cache->memory; - if (secureJit) { - compiler.enableWriting(); - } - compiler.generateSuperscalarHash(cache->programs, cache->reciprocalCache); - if (secureJit) { - compiler.enableExecution(); - } - } - - template - void CompiledLightVm::run(void* seed) { - VmBase::generateProgram(seed); - randomx_vm::initialize(); - if (secureJit) { - compiler.enableWriting(); - } - compiler.generateProgramLight(program, config, datasetOffset); - if (secureJit) { - compiler.enableExecution(); - } - CompiledVm::execute(); - } - - template class CompiledLightVm, false, false>; - template class CompiledLightVm, true, false>; - template class CompiledLightVm; - template class CompiledLightVm; - template class CompiledLightVm, false, true>; - template class CompiledLightVm, true, true>; - template class CompiledLightVm; - template class CompiledLightVm; -} \ No newline at end of file diff --git a/external/src/randomx/src/vm_compiled_light.hpp b/external/src/randomx/src/vm_compiled_light.hpp deleted file mode 100644 index bed4ce1..0000000 --- a/external/src/randomx/src/vm_compiled_light.hpp +++ /dev/null @@ -1,68 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include -#include "vm_compiled.hpp" - -namespace randomx { - - template - class CompiledLightVm : public CompiledVm { - public: - void* operator new(size_t size) { - void* ptr = AlignedAllocator::allocMemory(size); - if (ptr == nullptr) - throw std::bad_alloc(); - return ptr; - } - void operator delete(void* ptr) { - AlignedAllocator::freeMemory(ptr, sizeof(CompiledLightVm)); - } - void setCache(randomx_cache* cache) override; - void setDataset(randomx_dataset* dataset) override { } - void run(void* seed) override; - - using CompiledVm::mem; - using CompiledVm::compiler; - using CompiledVm::program; - using CompiledVm::config; - using CompiledVm::cachePtr; - using CompiledVm::datasetOffset; - }; - - using CompiledLightVmDefault = CompiledLightVm, true, false>; - using CompiledLightVmHardAes = CompiledLightVm, false, false>; - using CompiledLightVmLargePage = CompiledLightVm; - using CompiledLightVmLargePageHardAes = CompiledLightVm; - using CompiledLightVmDefaultSecure = CompiledLightVm, true, true>; - using CompiledLightVmHardAesSecure = CompiledLightVm, false, true>; - using CompiledLightVmLargePageSecure = CompiledLightVm; - using CompiledLightVmLargePageHardAesSecure = CompiledLightVm; -} \ No newline at end of file diff --git a/external/src/randomx/src/vm_interpreted.cpp b/external/src/randomx/src/vm_interpreted.cpp deleted file mode 100644 index 64243c3..0000000 --- a/external/src/randomx/src/vm_interpreted.cpp +++ /dev/null @@ -1,131 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include -#include -#include -#include -#include -#include -#include "vm_interpreted.hpp" -#include "dataset.hpp" -#include "intrin_portable.h" -#include "reciprocal.h" - -namespace randomx { - - template - void InterpretedVm::setDataset(randomx_dataset* dataset) { - datasetPtr = dataset; - mem.memory = dataset->memory; - } - - template - void InterpretedVm::run(void* seed) { - VmBase::generateProgram(seed); - randomx_vm::initialize(); - execute(); - } - - template - void InterpretedVm::execute() { - - NativeRegisterFile nreg; - - for(unsigned i = 0; i < RegisterCountFlt; ++i) - nreg.a[i] = rx_load_vec_f128(®.a[i].lo); - - compileProgram(program, bytecode, nreg); - - uint32_t spAddr0 = mem.mx; - uint32_t spAddr1 = mem.ma; - - for(unsigned ic = 0; ic < RANDOMX_PROGRAM_ITERATIONS; ++ic) { - uint64_t spMix = nreg.r[config.readReg0] ^ nreg.r[config.readReg1]; - spAddr0 ^= spMix; - spAddr0 &= ScratchpadL3Mask64; - spAddr1 ^= spMix >> 32; - spAddr1 &= ScratchpadL3Mask64; - - for (unsigned i = 0; i < RegistersCount; ++i) - nreg.r[i] ^= load64(scratchpad + spAddr0 + 8 * i); - - for (unsigned i = 0; i < RegisterCountFlt; ++i) - nreg.f[i] = rx_cvt_packed_int_vec_f128(scratchpad + spAddr1 + 8 * i); - - for (unsigned i = 0; i < RegisterCountFlt; ++i) - nreg.e[i] = maskRegisterExponentMantissa(config, rx_cvt_packed_int_vec_f128(scratchpad + spAddr1 + 8 * (RegisterCountFlt + i))); - - executeBytecode(bytecode, scratchpad, config); - - mem.mx ^= nreg.r[config.readReg2] ^ nreg.r[config.readReg3]; - mem.mx &= CacheLineAlignMask; - datasetPrefetch(datasetOffset + mem.mx); - datasetRead(datasetOffset + mem.ma, nreg.r); - std::swap(mem.mx, mem.ma); - - for (unsigned i = 0; i < RegistersCount; ++i) - store64(scratchpad + spAddr1 + 8 * i, nreg.r[i]); - - for (unsigned i = 0; i < RegisterCountFlt; ++i) - nreg.f[i] = rx_xor_vec_f128(nreg.f[i], nreg.e[i]); - - for (unsigned i = 0; i < RegisterCountFlt; ++i) - rx_store_vec_f128((double*)(scratchpad + spAddr0 + 16 * i), nreg.f[i]); - - spAddr0 = 0; - spAddr1 = 0; - } - - for (unsigned i = 0; i < RegistersCount; ++i) - store64(®.r[i], nreg.r[i]); - - for (unsigned i = 0; i < RegisterCountFlt; ++i) - rx_store_vec_f128(®.f[i].lo, nreg.f[i]); - - for (unsigned i = 0; i < RegisterCountFlt; ++i) - rx_store_vec_f128(®.e[i].lo, nreg.e[i]); - } - - template - void InterpretedVm::datasetRead(uint64_t address, int_reg_t(&r)[RegistersCount]) { - uint64_t* datasetLine = (uint64_t*)(mem.memory + address); - for (int i = 0; i < RegistersCount; ++i) - r[i] ^= datasetLine[i]; - } - - template - void InterpretedVm::datasetPrefetch(uint64_t address) { - rx_prefetch_nta(mem.memory + address); - } - - template class InterpretedVm, false>; - template class InterpretedVm, true>; - template class InterpretedVm; - template class InterpretedVm; -} \ No newline at end of file diff --git a/external/src/randomx/src/vm_interpreted.hpp b/external/src/randomx/src/vm_interpreted.hpp deleted file mode 100644 index 2fac2ed..0000000 --- a/external/src/randomx/src/vm_interpreted.hpp +++ /dev/null @@ -1,75 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include -#include -#include "common.hpp" -#include "virtual_machine.hpp" -#include "bytecode_machine.hpp" -#include "intrin_portable.h" -#include "allocator.hpp" - -namespace randomx { - - template - class InterpretedVm : public VmBase, public BytecodeMachine { - public: - using VmBase::mem; - using VmBase::scratchpad; - using VmBase::program; - using VmBase::config; - using VmBase::reg; - using VmBase::datasetPtr; - using VmBase::datasetOffset; - void* operator new(size_t size) { - void* ptr = AlignedAllocator::allocMemory(size); - if (ptr == nullptr) - throw std::bad_alloc(); - return ptr; - } - void operator delete(void* ptr) { - AlignedAllocator::freeMemory(ptr, sizeof(InterpretedVm)); - } - void run(void* seed) override; - void setDataset(randomx_dataset* dataset) override; - protected: - virtual void datasetRead(uint64_t blockNumber, int_reg_t(&r)[RegistersCount]); - virtual void datasetPrefetch(uint64_t blockNumber); - private: - void execute(); - - InstructionByteCode bytecode[RANDOMX_PROGRAM_SIZE]; - }; - - using InterpretedVmDefault = InterpretedVm, true>; - using InterpretedVmHardAes = InterpretedVm, false>; - using InterpretedVmLargePage = InterpretedVm; - using InterpretedVmLargePageHardAes = InterpretedVm; -} \ No newline at end of file diff --git a/external/src/randomx/src/vm_interpreted_light.cpp b/external/src/randomx/src/vm_interpreted_light.cpp deleted file mode 100644 index c54b32f..0000000 --- a/external/src/randomx/src/vm_interpreted_light.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include "vm_interpreted_light.hpp" -#include "dataset.hpp" - -namespace randomx { - - template - void InterpretedLightVm::setCache(randomx_cache* cache) { - cachePtr = cache; - mem.memory = cache->memory; - } - - template - void InterpretedLightVm::datasetRead(uint64_t address, int_reg_t(&r)[8]) { - uint32_t itemNumber = address / CacheLineSize; - int_reg_t rl[8]; - - initDatasetItem(cachePtr, (uint8_t*)rl, itemNumber); - - for (unsigned q = 0; q < 8; ++q) - r[q] ^= rl[q]; - } - - template class InterpretedLightVm, false>; - template class InterpretedLightVm, true>; - template class InterpretedLightVm; - template class InterpretedLightVm; -} diff --git a/external/src/randomx/src/vm_interpreted_light.hpp b/external/src/randomx/src/vm_interpreted_light.hpp deleted file mode 100644 index 02d678f..0000000 --- a/external/src/randomx/src/vm_interpreted_light.hpp +++ /dev/null @@ -1,61 +0,0 @@ -/* -Copyright (c) 2018-2019, tevador - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include -#include "vm_interpreted.hpp" - -namespace randomx { - - template - class InterpretedLightVm : public InterpretedVm { - public: - using VmBase::mem; - using VmBase::cachePtr; - void* operator new(size_t size) { - void* ptr = AlignedAllocator::allocMemory(size); - if (ptr == nullptr) - throw std::bad_alloc(); - return ptr; - } - void operator delete(void* ptr) { - AlignedAllocator::freeMemory(ptr, sizeof(InterpretedLightVm)); - } - void setDataset(randomx_dataset* dataset) override { } - void setCache(randomx_cache* cache) override; - protected: - void datasetRead(uint64_t address, int_reg_t(&r)[8]) override; - void datasetPrefetch(uint64_t address) override { } - }; - - using InterpretedLightVmDefault = InterpretedLightVm, true>; - using InterpretedLightVmHardAes = InterpretedLightVm, false>; - using InterpretedLightVmLargePage = InterpretedLightVm; - using InterpretedLightVmLargePageHardAes = InterpretedLightVm; -}