diff --git a/BUILD.md b/BUILD.md index 9012656a..1f501fa7 100644 --- a/BUILD.md +++ b/BUILD.md @@ -35,6 +35,7 @@ - GCC 11.4 - Clang 9.0 - Clang 12.0 +- Clang 16.0 - GNU binutils 2.32 ### Windows* OS - [Common tools](#common-tools) @@ -212,10 +213,10 @@ To build the Intel IPP Cryptography library on macOS\*, complete the following s - `-DPLATFORM_LIST=""` - optional, works only if `-DMERGED_BLD:BOOL=off` is set. Sets target platforms for the code to be compiled. See the supported platforms list [here](./OVERVIEW.md). - Example for Linux\* OS and the IA-32 architecture: - `-DPLATFORM_LIST="m7;s8;p8;g9;h9"` + `-DPLATFORM_LIST="w7;s8;p8;g9;h9"` - Example for Linux\* OS and the Intel® 64 architecture: - `-DPLATFORM_LIST="w7;n8;y8;e9;l9;k0"` + `-DPLATFORM_LIST="m7;n8;y8;e9;l9;k0;k1"` - `-DNO_CRYPTO_MB:BOOL=TRUE` - optional, turns off the build of [Crypto Multi Buffer library](./sources/ippcp/crypto_mb/Readme.md) and, as a consequence, removes all dependencies on OpenSSL library. - `-DBABASSL:BOOL=on`, `-DBORINGSSL:BOOL=on` - required only if forks of OpenSSL library are used to resolve OpenSSL dependencies - Tongsuo and BoringSSL respectively. These flags make sense when [Crypto Multi Buffer library](./sources/ippcp/crypto_mb/Readme.md) is built. - `-DIPPCP_CUSTOM_BUILD=""` - optional, works only if `-DMERGED_BLD:BOOL=off` is set, i.e. only for 1CPU libraries. Enables the CPU feature dispatching mask at compile-time based on the provided list. diff --git a/CHANGELOG.md b/CHANGELOG.md index f23fda35..d60c9cad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ This is a list of notable changes to Intel(R) IPP Cryptography, in reverse chronological order. +## Intel(R) IPP Cryptography 2021.12 +- Added single-buffer implementation of Leighton-Micali Hash-Based Signatures(LMS) algorithm, verification part. +- Added support of Clang 16.0 compiler for Linux. +- Added examples of AES-GCM Encryption/Decryption usage. +- AES-GCM algorithm with Intel® Advanced Vector Extensions 2 (Intel® AVX2) vector extensions of Intel® AES New Instructions (Intel® AES-NI) was optimized. + ## Intel(R) IPP Cryptography 2021.11 - Minimal supported BoringSSL version was increased to [45cf810d](https://github.com/google/boringssl/archive/45cf810dbdbd767f09f8cb0b0fcccd342c39041f.tar.gz) tag. diff --git a/LICENSE b/LICENSE index c7047e16..ec78a4e5 100644 --- a/LICENSE +++ b/LICENSE @@ -173,62 +173,4 @@ incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright {yyyy} {name of copyright owner} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - CMake - ------------------------------ - CMake - Cross Platform Makefile Generator - Copyright 2000-2021 Kitware, Inc. and Contributors - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of Kitware, Inc. nor the names of Contributors - may be used to endorse or promote products derived from this - software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file +END OF TERMS AND CONDITIONS diff --git a/README.md b/README.md index 1ac624a6..bda55102 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ The library provides a comprehensive set of routines commonly used for cryptogra - Finite Field Arithmetic Functions - Big Number Integer Arithmetic Functions - PRNG/TRNG and Prime Numbers Generation +- Hash-based signature algorithms ## Reasons to Use Intel IPP Cryptography - Security (constant-time execution for secret processing functions) diff --git a/README_FIPS.md b/README_FIPS.md index 29af90c3..ca16a29b 100644 --- a/README_FIPS.md +++ b/README_FIPS.md @@ -25,7 +25,7 @@ In general, software may be certified at up to level 2. Intel® Integrated Performance Primitives Cryptography (Intel(R) IPP Cryptography) provides building blocks of FIPS-mode API (such as self-tests, FIPS-approved -functionality status query) which can help the end users to fullfill FIPS level 1 requirements. +functionality status query) which can help the end users to fulfill FIPS level 1 requirements. Please, refer to [Covered Algorithms](#covered-algorithms) section for the full list of FIPS-Approved API which are covered with the selftests. @@ -41,7 +41,7 @@ Intel(R) IPP Cryptography may be built in FIPS-mode with IPPCP_FIPS_MODE=on configuration for ippcp and MBX_FIPS_MODE=on for crypto_MB (see details in [Build section](#build)). Application, which uses Intel(R) IPP Cryptography may be **FIPS-Certified** by -matching FIPS 140 requirement and obtaining NIST sertificate or also be **FIPS-Compliant** for their own customers. +matching FIPS 140 requirement and obtaining NIST certificate or also be **FIPS-Compliant** for their own customers. Please, refer to [Level 1 Specific Requirements](#level-1-specific-requirements) for the detailed description of what is done on Intel(R) IPP Cryptography-side @@ -59,7 +59,7 @@ and what should be done by a more high-level application. | 6 | Run pairwise consistency selftest for newly generated RSA/ECC keypair | Intel(R) IPP Cryptography provides [fips_selftest_ippcp API](#covered-algorithms) to run selftests | | 7 | Module to guarantee uniqueness of GSM key + IV | **User's application effort required** | | 8 | Module to guarantee XTS key1 != key2 | Intel(R) IPP Cryptography-side check | -| 9 | (non-production) Extract raw noise source output samples of RBG for quality analysis | DBRNG is currenty out of the cryptography boundary | +| 9 | (non-production) Extract raw noise source output samples of RBG for quality analysis | DBRNG is currently out of the cryptography boundary | | 10| (non-production) Run crypto algorithm testing with NIST-generated vectors | Done offline by Intel(R) IPP Cryptography for the [covered algorithms](#covered-algorithms) | For the implementation details about the steps in [Level 1 Specific Requirements](#level-1-specific-requirements) @@ -107,7 +107,7 @@ Configuration example for ippcp with Intel® C++ Compiler: `CC=icc CXX=icpc cmake CMakeLists.txt -B_build -DARCH=intel64 -DIPPCP_FIPS_MODE:BOOL=on[-DIPPCP_SELFTEST_USE_MALLOC:BOOL=on]` -> Note: selftests with intenal memory allocation uses malloc, which introduces +> Note: selftests with internal memory allocation uses malloc, which introduces a c runtime dependency. To avoid the dependency, use IPPCP_SELFTEST_USE_MALLOC:BOOL=off or do not specify it as this as the default. In this case, all self-tests will require external memory allocation. @@ -186,7 +186,7 @@ mbx_nistp256_ecdh_mb8(sharedBA, prvB, pubAx, pubAy, pubAz_curr, 0); #### Intel(R) IPP Cryptography Each API from the list is covered with the selftest fips_selftest_ipps -availible in Intel(R) IPP Cryptography build in FIPS mode. +available in Intel(R) IPP Cryptography build in FIPS mode. ##### AES diff --git a/data/images/README_FIPS-pictures-0-ippcp_architecture.png b/data/images/README_FIPS-pictures-0-ippcp_architecture.png index ef9381a9..b4c5f563 100644 Binary files a/data/images/README_FIPS-pictures-0-ippcp_architecture.png and b/data/images/README_FIPS-pictures-0-ippcp_architecture.png differ diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 3de76439..adeccbc3 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -20,9 +20,12 @@ # List of examples for targets generation set(IPPCP_EXAMPLES - # AES examples + # AES-CTR examples aes/aes-256-ctr-encryption.cpp aes/aes-256-ctr-decryption.cpp + # AES-GCM examples + aes/aes-128-gcm-encryption.cpp + aes/aes-128-gcm-decryption.cpp # DSA dsa/dsa-dlp-sha-1-verification.cpp dsa/dsa-dlp-sha-256-verification.cpp diff --git a/examples/aes/aes-128-gcm-decryption.cpp b/examples/aes/aes-128-gcm-decryption.cpp new file mode 100644 index 00000000..d57afec4 --- /dev/null +++ b/examples/aes/aes-128-gcm-decryption.cpp @@ -0,0 +1,172 @@ +/************************************************************************* +* Copyright (C) 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*************************************************************************/ + +/*! + * + * \file + * + * \brief AES Galois Counter mode of operation (GCM) example + * + * This example demonstrates usage of AES block cipher with 128-bit key + * run with GCM mode of operation. Decryption scheme. + * + * The GCM mode of operation is implemented according to the + * "NIST Special Publication 800-38D: Recommendation for Block Cipher Modes of + * Operation: Galois/Counter Mode (GCM) and GMAC" document: + * + * https://csrc.nist.gov/pubs/sp/800/38/d/final + * + */ + +#include + +#include "ippcp.h" +#include "examples_common.h" + +/*! Key size in bytes */ +static const int KEY_SIZE = 16; + +/*! Message size in bytes */ +static const int MSG_LEN = 60; + +/*! Initialization vector size in bytes */ +static const int IV_LEN = 12; + +/*! Tag size in bytes */ +static const int TAG_LEN = 16; + +/*! Additional authenticated data size in bytes */ +static const int AAD_LEN = 20; + +/*! 128-bit secret key */ +static Ipp8u key128[KEY_SIZE] = { + 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c, + 0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08 +}; + +/*! Initialization vector */ +static const Ipp8u iv[IV_LEN] = { + 0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad, + 0xde,0xca,0xf8,0x88 +}; + +/*! Plain text */ +static Ipp8u plainText[MSG_LEN] = { + 0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5, + 0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a, + 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda, + 0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72, + 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53, + 0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25, + 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57, + 0xba,0x63,0x7b,0x39 +}; + +/*! Cipher text */ +static Ipp8u cipherText[MSG_LEN] = { + 0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24, + 0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c, + 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0, + 0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e, + 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c, + 0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05, + 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97, + 0x3d,0x58,0xe0,0x91 +}; + +/*! Tag */ +static const Ipp8u tag[TAG_LEN] = { + 0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb, + 0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47 +}; + +/*! Additional authenticated data */ +static const Ipp8u aad[AAD_LEN] = { + 0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef, + 0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef, + 0xab,0xad,0xda,0xd2 +}; + +/*! Main function */ +int main(void) +{ + /* Size of AES-GCM context structure. It will be set up in ippsAES_GCMGetSize(). */ + int AESGCMSize = 0; + + /* Output plain text */ + Ipp8u pOutPlainText[MSG_LEN] = {}; + /* Output tag */ + Ipp8u pOutTag[TAG_LEN] = {}; + + /* Pointer to AES-GCM context structure */ + IppsAES_GCMState* pAESGCMState = 0; + + /* Internal function status */ + IppStatus status = ippStsNoErr; + + do { + /* 1. Get size needed for AES-GCM context structure */ + status = ippsAES_GCMGetSize(&AESGCMSize); + if (!checkStatus("ippsAES_GCMGetSize", ippStsNoErr, status)) + return status; + + /* 2. Allocate memory for AES-GCM context structure */ + pAESGCMState = (IppsAES_GCMState*)(new Ipp8u[AESGCMSize]); + if (NULL == pAESGCMState) { + printf("ERROR: Cannot allocate memory (%d bytes) for AES-GCM state\n", AESGCMSize); + return -1; + } + + /* 3. Initialize AES-GCM context */ + status = ippsAES_GCMInit(key128, KEY_SIZE, pAESGCMState, AESGCMSize); + if (!checkStatus("ippsAES_GCMInit", ippStsNoErr, status)) + break; + + /* 4. Decryption setup */ + status = ippsAES_GCMStart(iv, IV_LEN, aad, AAD_LEN, pAESGCMState); + if (!checkStatus("ippsAES_GCMStart", ippStsNoErr, status)) + break; + + /* 5.Decryption */ + status = ippsAES_GCMDecrypt(cipherText, pOutPlainText, MSG_LEN, pAESGCMState); + if (!checkStatus("ippsAES_GCMDecrypt", ippStsNoErr, status)) + break; + + /* 6. Get tag */ + status = ippsAES_GCMGetTag(pOutTag, TAG_LEN, pAESGCMState); + if (!checkStatus("ippsAES_GCMGetTag", ippStsNoErr, status)) + break; + + /* Compare output to known answer */ + if (0 != memcmp(pOutTag, tag, TAG_LEN)) { + printf("ERROR: Output tag and reference tag do not match\n"); + break; + } + if (0 != memcmp(pOutPlainText, plainText, MSG_LEN)) { + printf("ERROR: Decrypted and plain text do not match\n"); + break; + } + } while (0); + + /* 7. Remove secret and release resources */ + ippsAES_GCMReset(pAESGCMState); + if (pAESGCMState) + delete [] (Ipp8u*)pAESGCMState; + + PRINT_EXAMPLE_STATUS("ippsAES_GCMDecrypt", "AES-GCM 128 Decryption", !status) + + return status; +} diff --git a/examples/aes/aes-128-gcm-encryption.cpp b/examples/aes/aes-128-gcm-encryption.cpp new file mode 100644 index 00000000..102a50f6 --- /dev/null +++ b/examples/aes/aes-128-gcm-encryption.cpp @@ -0,0 +1,172 @@ +/************************************************************************* +* Copyright (C) 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*************************************************************************/ + +/*! + * + * \file + * + * \brief AES Galois Counter mode of operation (GCM) example + * + * This example demonstrates usage of AES block cipher with 128-bit key + * run with GCM mode of operation. Encryption scheme. + * + * The GCM mode of operation is implemented according to the + * "NIST Special Publication 800-38D: Recommendation for Block Cipher Modes of + * Operation: Galois/Counter Mode (GCM) and GMAC" document: + * + * https://csrc.nist.gov/pubs/sp/800/38/d/final + * + */ + +#include + +#include "ippcp.h" +#include "examples_common.h" + +/*! Key size in bytes */ +static const int KEY_SIZE = 16; + +/*! Message size in bytes */ +static const int MSG_LEN = 60; + +/*! Initialization vector size in bytes */ +static const int IV_LEN = 12; + +/*! Tag size in bytes */ +static const int TAG_LEN = 16; + +/*! Additional authenticated data size in bytes */ +static const int AAD_LEN = 20; + +/*! 128-bit secret key */ +static Ipp8u key128[KEY_SIZE] = { + 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c, + 0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08 +}; + +/*! Initialization vector */ +static const Ipp8u iv[IV_LEN] = { + 0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad, + 0xde,0xca,0xf8,0x88 +}; + +/*! Plain text */ +static Ipp8u plainText[MSG_LEN] = { + 0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5, + 0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a, + 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda, + 0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72, + 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53, + 0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25, + 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57, + 0xba,0x63,0x7b,0x39 +}; + +/*! Cipher text */ +static Ipp8u cipherText[MSG_LEN] = { + 0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24, + 0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c, + 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0, + 0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e, + 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c, + 0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05, + 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97, + 0x3d,0x58,0xe0,0x91 +}; + +/*! Tag */ +static const Ipp8u tag[TAG_LEN] = { + 0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb, + 0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47 +}; + +/*! Additional authenticated data */ +static const Ipp8u aad[AAD_LEN] = { + 0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef, + 0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef, + 0xab,0xad,0xda,0xd2 +}; + +/*! Main function */ +int main(void) +{ + /* Size of AES-GCM context structure. It will be set up in ippsAES_GCMGetSize(). */ + int AESGCMSize = 0; + + /* Output cipher text */ + Ipp8u pOutCipherText[MSG_LEN] = {}; + /* Output tag */ + Ipp8u pOutTag[TAG_LEN] = {}; + + /* Pointer to AES-GCM context structure */ + IppsAES_GCMState* pAESGCMState = 0; + + /* Internal function status */ + IppStatus status = ippStsNoErr; + + do { + /* 1. Get size needed for AES-GCM context structure */ + status = ippsAES_GCMGetSize(&AESGCMSize); + if (!checkStatus("ippsAES_GCMGetSize", ippStsNoErr, status)) + return status; + + /* 2. Allocate memory for AES-GCM context structure */ + pAESGCMState = (IppsAES_GCMState*)(new Ipp8u[AESGCMSize]); + if (NULL == pAESGCMState) { + printf("ERROR: Cannot allocate memory (%d bytes) for AES-GCM state\n", AESGCMSize); + return -1; + } + + /* 3. Initialize AES-GCM context */ + status = ippsAES_GCMInit(key128, KEY_SIZE, pAESGCMState, AESGCMSize); + if (!checkStatus("ippsAES_GCMInit", ippStsNoErr, status)) + break; + + /* 4. Encryption setup */ + status = ippsAES_GCMStart(iv, IV_LEN, aad, AAD_LEN, pAESGCMState); + if (!checkStatus("ippsAES_GCMStart", ippStsNoErr, status)) + break; + + /* 5. Encryption */ + status = ippsAES_GCMEncrypt(plainText, pOutCipherText, MSG_LEN, pAESGCMState); + if (!checkStatus("ippsAES_GCMEncrypt", ippStsNoErr, status)) + break; + + /* 6. Get tag */ + status = ippsAES_GCMGetTag(pOutTag, TAG_LEN, pAESGCMState); + if (!checkStatus("ippsAES_GCMGetTag", ippStsNoErr, status)) + break; + + /* Compare output to known answer */ + if (0 != memcmp(pOutTag, tag, TAG_LEN)) { + printf("ERROR: Output tag and reference tag do not match\n"); + break; + } + if (0 != memcmp(pOutCipherText, cipherText, MSG_LEN)) { + printf("ERROR: Encrypted and reference messages do not match\n"); + break; + } + } while (0); + + /* 7. Remove secret and release resources */ + ippsAES_GCMReset(pAESGCMState); + if (pAESGCMState) + delete [] (Ipp8u*)pAESGCMState; + + PRINT_EXAMPLE_STATUS("ippsAES_GCMEncrypt", "AES-GCM 128 Encryption", !status) + + return status; +} diff --git a/examples/examplesBuildOptions.cmake b/examples/examplesBuildOptions.cmake index ac8dfa96..e3d830af 100644 --- a/examples/examplesBuildOptions.cmake +++ b/examples/examplesBuildOptions.cmake @@ -60,9 +60,12 @@ if(UNIX) set(LINK_FLAG_S_ST_LINUX "-Wl,-z,noexecstack -Wl,-z,relro -Wl,-z,now") if(NOT NONPIC_LIB) ippcp_extend_variable(LINK_FLAG_S_ST_LINUX "-fpie") + ippcp_extend_variable(CMAKE_CXX_FLAGS "-fpie -fPIE") + else() + ippcp_extend_variable(LINK_FLAG_S_ST_LINUX "-no-pie") endif() - ippcp_extend_variable(CMAKE_CXX_FLAGS "-D_FORTIFY_SOURCE=2 -Wformat -Wformat-security -fpie -fPIE") + ippcp_extend_variable(CMAKE_CXX_FLAGS "-D_FORTIFY_SOURCE=2 -Wformat -Wformat-security") if(${ARCH} MATCHES "ia32") ippcp_extend_variable(LINK_FLAG_S_ST_LINUX "-m32") diff --git a/examples/utils/bignum.h b/examples/utils/bignum.h index 9c070afc..2ef32680 100644 --- a/examples/utils/bignum.h +++ b/examples/utils/bignum.h @@ -42,7 +42,7 @@ class BigNumber friend IppsBigNumState* BN(const BigNumber& bn) {return bn.m_pBN;} operator IppsBigNumState* () const { return m_pBN; } - // some useful constatns + // some useful constants static const BigNumber& Zero(); static const BigNumber& One(); static const BigNumber& Two(); diff --git a/include/ippcp.h b/include/ippcp.h index b3a0e670..f5cb74eb 100644 --- a/include/ippcp.h +++ b/include/ippcp.h @@ -1555,6 +1555,23 @@ IPPAPI(IppStatus, ippsXMSSVerify, (const Ipp8u* pMsg, #endif // IPPCP_PREVIEW_XMSS +#ifdef IPPCP_PREVIEW_LMS + +IPPAPI(IppStatus, ippsLMSBufferGetSize, (Ipp32s* pSize, Ipp32s maxMessageLength, const IppsLMSAlgoType lmsType)) +IPPAPI(IppStatus, ippsLMSSignatureStateGetSize, (Ipp32s* pSize, const IppsLMSAlgoType lmsType)) +IPPAPI(IppStatus, ippsLMSPublicKeyStateGetSize, (Ipp32s* pSize, const IppsLMSAlgoType lmsType)) +IPPAPI(IppStatus, ippsLMSSetPublicKeyState, (const IppsLMSAlgoType lmsType, const Ipp8u* pI, const Ipp8u* pK, + IppsLMSPublicKeyState* pState)) +IPPAPI(IppStatus, ippsLMSSetSignatureState, (const IppsLMSAlgoType lmsType, Ipp32u q, const Ipp8u* pC, + const Ipp8u* pY, const Ipp8u* pAuthPath, + IppsLMSSignatureState* pState)) +IPPAPI(IppStatus, ippsLMSVerify, (const Ipp8u* pMsg, const Ipp32s msgLen, + const IppsLMSSignatureState* pSign, + int* pIsSignValid, + const IppsLMSPublicKeyState* pKey, + Ipp8u* pBuffer)) +#endif // IPPCP_PREVIEW_LMS + #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && !defined(__INTEL_LLVM_COMPILER) #pragma warning(pop) #endif diff --git a/include/ippcp/fips_cert.h b/include/ippcp/fips_cert.h index 56d2bc87..4feef86c 100644 --- a/include/ippcp/fips_cert.h +++ b/include/ippcp/fips_cert.h @@ -222,7 +222,8 @@ enum FIPS_IPPCP_FUNC { GFpECESEncrypt_SM2, GFpECESDecrypt_SM2, GFpECESFinal_SM2, - XMSSVerify + XMSSVerify, + LMSVerify }; /** diff --git a/include/ippcpdefs.h b/include/ippcpdefs.h index f57f4a2a..a1c41558 100644 --- a/include/ippcpdefs.h +++ b/include/ippcpdefs.h @@ -853,28 +853,84 @@ IPPAPI( const char*, ippcpGetStatusString, ( IppStatus StsCode )) IPPAPI( int, ippcpGetEnabledNumThreads, ( void ) ) IPPAPI( Ipp64u, ippcpGetCpuClocks, (void) ) -#ifdef IPPCP_PREVIEW_XMSS +/* Defines related to experimental features enabling */ +#ifdef IPPCP_PREVIEW_ALL + #ifndef IPPCP_PREVIEW_XMSS + #define IPPCP_PREVIEW_XMSS (1) + #endif + #ifndef IPPCP_PREVIEW_LMS + #define IPPCP_PREVIEW_LMS (1) + #endif +#endif + /* // ========================================================= // XMSS Algo // ========================================================= */ +#ifdef IPPCP_PREVIEW_XMSS + typedef enum + { + reserved = 0, + XMSS_SHA2_10_256 = 1, + XMSS_SHA2_16_256 = 2, + XMSS_SHA2_20_256 = 3, + XMSS_SHA2_10_512 = 4, + XMSS_SHA2_16_512 = 5, + XMSS_SHA2_20_512 = 6 + } IppsXMSSAlgo; + + typedef struct _cpXMSSSignatureState IppsXMSSSignatureState; + typedef struct _cpXMSSPublicKeyState IppsXMSSPublicKeyState; -typedef enum -{ - reserved = 0, - XMSS_SHA2_10_256 = 1, - XMSS_SHA2_16_256 = 2, - XMSS_SHA2_20_256 = 3, - XMSS_SHA2_10_512 = 4, - XMSS_SHA2_16_512 = 5, - XMSS_SHA2_20_512 = 6 -} IppsXMSSAlgo; +#endif // IPPCP_PREVIEW_XMSS -typedef struct _cpXMSSSignatureState IppsXMSSSignatureState; -typedef struct _cpXMSSPublicKeyState IppsXMSSPublicKeyState; -#endif // IPPCP_PREVIEW_XMSS +/* +// ========================================================= +// LMS Algo +// ========================================================= +*/ +#ifdef IPPCP_PREVIEW_LMS + /* Parameters set is based on two articles: + * RFC8554 (https://datatracker.ietf.org/doc/html/rfc8554) + * https://datatracker.ietf.org/doc/html/draft-fluhrer-lms-more-parm-sets-00 + */ + typedef enum + { + LMOTS_SHA256_N32_W1 = 1, + LMOTS_SHA256_N32_W2 = 2, + LMOTS_SHA256_N32_W4 = 3, + LMOTS_SHA256_N32_W8 = 4, + LMOTS_SHA256_N24_W1 = 5, + LMOTS_SHA256_N24_W2 = 6, + LMOTS_SHA256_N24_W4 = 7, + LMOTS_SHA256_N24_W8 = 8 + } IppsLMOTSAlgo; + + typedef enum + { + LMS_SHA256_M32_H5 = 5, + LMS_SHA256_M32_H10 = 6, + LMS_SHA256_M32_H15 = 7, + LMS_SHA256_M32_H20 = 8, + LMS_SHA256_M32_H25 = 9, + LMS_SHA256_M24_H5 = 10, + LMS_SHA256_M24_H10 = 11, + LMS_SHA256_M24_H15 = 12, + LMS_SHA256_M24_H20 = 13, + LMS_SHA256_M24_H25 = 14 + } IppsLMSAlgo; + + typedef struct { + IppsLMOTSAlgo lmotsOIDAlgo; + IppsLMSAlgo lmsOIDAlgo; + } IppsLMSAlgoType; + + typedef struct _cpLMSSignatureState IppsLMSSignatureState; + typedef struct _cpLMSPublicKeyState IppsLMSPublicKeyState; +#endif // IPPCP_PREVIEW_LMS + #ifdef __cplusplus } diff --git a/include/ippversion.h b/include/ippversion.h index 17851429..54bf8f9f 100644 --- a/include/ippversion.h +++ b/include/ippversion.h @@ -26,14 +26,14 @@ #if !defined( IPPVERSION_H__ ) #define IPPVERSION_H__ -#define IPP_VERSION_MAJOR 2021 -#define IPP_VERSION_MINOR 12 +#define IPP_VERSION_MAJOR 1 +#define IPP_VERSION_MINOR 0 #define IPP_VERSION_UPDATE 0 // Major interface version -#define IPP_INTERFACE_VERSION_MAJOR 11 +#define IPP_INTERFACE_VERSION_MAJOR 12 // Minor interface version -#define IPP_INTERFACE_VERSION_MINOR 14 +#define IPP_INTERFACE_VERSION_MINOR 0 #define IPP_VERSION_STR STR(IPP_VERSION_MAJOR) "." STR(IPP_VERSION_MINOR) "." STR(IPP_VERSION_UPDATE) " (" STR(IPP_INTERFACE_VERSION_MAJOR) "." STR(IPP_INTERFACE_VERSION_MINOR) " )" diff --git a/sources/cmake/windows/IntelLLVM2023.1.0.cmake b/sources/cmake/windows/IntelLLVM2023.1.0.cmake new file mode 100644 index 00000000..dd570edd --- /dev/null +++ b/sources/cmake/windows/IntelLLVM2023.1.0.cmake @@ -0,0 +1,107 @@ +#=============================================================================== +# Copyright 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +# +# Intel® Integrated Performance Primitives Cryptography (Intel® IPP Cryptography) +# + +# linker +set(LINK_FLAG_STATIC_WINDOWS "/ignore:4221") # ignore warnings about empty obj files +# Suppresses the display of the copyright banner when the compiler starts up and display of informational messages during compiling. +set(LINK_FLAG_DYNAMIC_WINDOWS "/nologo") +# Displays information about modules that are incompatible with safe structured exception handling when /SAFESEH isn't specified. +set(LINK_FLAG_DYNAMIC_WINDOWS "${LINK_FLAG_DYNAMIC_WINDOWS} /VERBOSE:SAFESEH") +# Disable incremental linking +set(LINK_FLAG_DYNAMIC_WINDOWS "${LINK_FLAG_DYNAMIC_WINDOWS} /INCREMENTAL:NO") +# The /NODEFAULTLIB option tells the linker to remove one or more default libraries from the list of libraries it searches when resolving external references. +set(LINK_FLAG_DYNAMIC_WINDOWS "${LINK_FLAG_DYNAMIC_WINDOWS} /NODEFAULTLIB") +# Indicates that an executable was tested to be compatible with the Windows Data Execution Prevention feature. +set(LINK_FLAG_DYNAMIC_WINDOWS "${LINK_FLAG_DYNAMIC_WINDOWS} /NXCOMPAT") +# Specifies whether to generate an executable image that can be randomly rebased at load time. +set(LINK_FLAG_DYNAMIC_WINDOWS "${LINK_FLAG_DYNAMIC_WINDOWS} /DYNAMICBASE") +# Enable Intel® Control-Flow Enforcement Technology (Intel® CET) protection +set(LINK_FLAG_DYNAMIC_WINDOWS "${LINK_FLAG_DYNAMIC_WINDOWS} /CETCOMPAT") + +if(${ARCH} MATCHES "ia32") + # When /SAFESEH is specified, the linker will only produce an image if it can also produce a table of the image's safe exception handlers. + set(LINK_FLAG_DYNAMIC_WINDOWS "${LINK_FLAG_DYNAMIC_WINDOWS} /SAFESEH") +else() + # The /LARGEADDRESSAWARE option tells the linker that the application can handle addresses larger than 2 gigabytes. + set(LINK_FLAG_DYNAMIC_WINDOWS "${LINK_FLAG_DYNAMIC_WINDOWS} /LARGEADDRESSAWARE") + # This option modifies the header of an executable image, a .dll file or .exe file, to indicate whether ASLR with 64-bit addresses is supported. + set(LINK_FLAG_DYNAMIC_WINDOWS "${LINK_FLAG_DYNAMIC_WINDOWS} /HIGHENTROPYVA") +endif(${ARCH} MATCHES "ia32") + +# Disables linking to Intel® libraries +set(LINK_FLAG_DYNAMIC_WINDOWS "${LINK_FLAG_DYNAMIC_WINDOWS} /Qno-intel-lib") + +# Link to universal C runtime and MSVC runtime. Used in dlls. +set(LINK_LIB_STATIC_RELEASE libcmt libucrt libvcruntime) +set(LINK_LIB_STATIC_DEBUG libcmtd libucrtd libvcruntime) + +# compiler +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${LIBRARY_DEFINES}") + +# Suppresses the display of the copyright banner when the compiler starts up and display of informational messages during compiling. +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /nologo") +# Warning level = 4 +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4") +# Changes all warnings to errors. +#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /WX") +# Detects some buffer overruns that overwrite a function's return address, exception handler address, or certain types of parameters. +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /GS") +# Controls how the members of a structure are packed into memory and specifies the same packing for all structures in a module. +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Zp16") +# Allows the compiler to package individual functions in the form of packaged functions. Smaller resulting size. +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Gy") +# C std +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qstd=c99") +# Enable Intel® Control-Flow Enforcement Technology (Intel® CET) protection +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fcf-protection:full") +# Suppress some warnings +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Wno-missing-braces /Wno-null-pointer-arithmetic /Wno-unused-function /Wno-static-in-inline /Qno-intel-lib") + +# Causes the application to use the multithread, static version of the run-time library (debug version). +set(CMAKE_C_FLAGS_DEBUG "/MTd") +# The /Zi option produces a separate PDB file that contains all the symbolic debugging information for use with the debugger. +set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /Zi") +# Turns off all optimizations in the program and speeds compilation. +set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /Od") +# Debug macro +set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /DDEBUG") + +# Causes the application to use the multithread, static version of the run-time library. +set(CMAKE_C_FLAGS_RELEASE "/MT") +# Omits the default C runtime library name from the .obj file. +set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zl") +# "Maximize Speed". Selects a predefined set of options that affect the size and speed of generated code. +set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /O3") +# No-debug macro +set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /DNDEBUG") + +set(w7_opt "${w7_opt} /arch:SSE2") +set(s8_opt "${s8_opt} /arch:SSSE3") +set(p8_opt "${p8_opt} /arch:SSE4.2 -maes -mpclmul -msha") +set(g9_opt "${g9_opt} /arch:AVX -maes -mpclmul -msha -mrdrnd -mrdseed") +set(h9_opt "${h9_opt} /arch:AVX2 -maes -mpclmul -msha -mrdrnd -mrdseed -mvaes -mvpclmulqdq") +set(m7_opt "${m7_opt} /arch:SSE3") +set(n8_opt "${n8_opt} /arch:SSSE3") +set(y8_opt "${y8_opt} /arch:SSE4.2 -maes -mpclmul -msha") +set(e9_opt "${e9_opt} /arch:AVX -maes -mpclmul -msha -mrdrnd -mrdseed") +set(l9_opt "${l9_opt} /arch:CORE-AVX2 -maes -mpclmul -msha -mrdrnd -mrdseed -mvaes -mvpclmulqdq") +set(n0_opt "${n0_opt} /arch:CORE-AVX2 -maes -mavx512f -mavx512cd -mavx512pf -mavx512er -mpclmul -msha -mrdrnd -mrdseed") +set(k0_opt "${k0_opt} /arch:SKYLAKE-AVX512 -maes -mavx512f -mavx512cd -mavx512vl -mavx512bw -mavx512dq -mpclmul -mrdrnd -mrdseed -madx") +set(k1_opt "${k1_opt} /arch:ICELAKE-SERVER -maes -mavx512f -mavx512cd -mavx512vl -mavx512bw -mavx512dq -mavx512ifma -mpclmul -msha -mrdrnd -mrdseed -madx -mgfni -mvaes -mvpclmulqdq -mavx512vbmi -mavx512vbmi2") diff --git a/sources/dispatcher/gen_disp_lin32.nonpic.py b/sources/dispatcher/gen_disp_lin32.nonpic.py index 457dceb4..a59b6d60 100644 --- a/sources/dispatcher/gen_disp_lin32.nonpic.py +++ b/sources/dispatcher/gen_disp_lin32.nonpic.py @@ -107,7 +107,7 @@ {FunName}: {endbr32} mov eax, dword [ippcpJumpIndexForMergedLibs] - jmp dword [rel arraddr_{FunName} + eax*4] + jmp dword [arraddr_{FunName} + eax*4] .LEnd{FunName}: """.format(FunName=FunName, size=size, endbr32='db 0xf3, 0x0f, 0x1e, 0xfb')) ASMDISP.close() diff --git a/sources/include/fips_cert_internal/bn_common.h b/sources/include/fips_cert_internal/bn_common.h index 6a36dfa8..a2e8dc63 100644 --- a/sources/include/fips_cert_internal/bn_common.h +++ b/sources/include/fips_cert_internal/bn_common.h @@ -31,9 +31,9 @@ * \param[in] sgn sign of big number * \param[in] pdata pointer to integer big number * \param[in] data_word_len length of integer big number in 32bit size - * + * */ -__INLINE IppStatus ippcp_init_set_bn(IppsBigNumState *pbn, int max_word_len, +__IPPCP_INLINE IppStatus ippcp_init_set_bn(IppsBigNumState *pbn, int max_word_len, IppsBigNumSGN sgn, const Ipp32u *pdata, int data_word_len) { IppStatus sts; diff --git a/sources/include/ippres.gen b/sources/include/ippres.gen index 0b145050..3ba40923 100644 --- a/sources/include/ippres.gen +++ b/sources/include/ippres.gen @@ -42,7 +42,7 @@ BEGIN BLOCK "040904b0" BEGIN VALUE "CompanyName", "Intel Corporation.\0" - VALUE "FileVersion", STR( VERSION() ) "\0" + VALUE "FileVersion", STR_FILE_VERSION() "\0" VALUE "ProductName", IPP_LIB_SHORTNAME() ". Intel(R) Integrated Performance Primitives. " IPP_LIB_LONGNAME() ".\0" VALUE "ProductVersion", STR_VERSION() "\0" VALUE "LegalCopyright", "Copyright (C) 1999-2021, Intel Corporation. All rights reserved.\0" diff --git a/sources/include/ippver.h b/sources/include/ippver.h index 0b2f5ab8..52d1a278 100644 --- a/sources/include/ippver.h +++ b/sources/include/ippver.h @@ -30,6 +30,10 @@ #define STR2(x) #x #define STR(x) STR2(x) +#ifndef STR_BASE_VERSION +#define STR_BASE_VERSION() STR(IPP_VERSION_MAJOR) "," STR(IPP_VERSION_MINOR) ", " STR(IPP_VERSION_UPDATE) +#endif + #ifndef STR_VERSION #ifdef IPP_REVISION #define STR_VERSION() IPP_VERSION_STR " (r" STR( IPP_REVISION ) ")" diff --git a/sources/include/lms_internal/lmots.h b/sources/include/lms_internal/lmots.h new file mode 100644 index 00000000..bb3f480c --- /dev/null +++ b/sources/include/lms_internal/lmots.h @@ -0,0 +1,156 @@ +/************************************************************************* +* Copyright (C) 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*************************************************************************/ + +#ifndef IPPCP_LMOTS_H_ +#define IPPCP_LMOTS_H_ + +#include "owndefs.h" +#include "pcptool.h" + +#include "stateful_sig_common/common.h" + +/* + * LMOTS algorithms params. "Table 1" LMS spec. + */ +typedef struct { + Ipp32u n; + Ipp32u w; + Ipp32u p; + Ipp32u ls; + IppsHashMethod* hash_method; +} cpLMOTSParams; + +/* + * Standard data format for LMOTS signature + * | 4 bytes || n bytes || n bytes || n bytes ||...|| n bytes | + * | otssigtype || C || Y[0] || Y[1] ||...|| Y[p-1] | + */ +typedef struct { + IppsLMOTSAlgo _lmotsOIDAlgo; + Ipp8u* pC; + Ipp8u* pY; +} _cpLMOTSSignatureState; + +/* + * Set LMOTS parameters + * + * Returns: Reason: + * ippStsBadArgErr lmotsOIDAlgo > Max value for IppsLMOTSAlgo + * lmotsOIDAlgo <= 0 + * ippStsNoErr no errors + * + * Input parameters: + * lmotsOIDAlgo id of LMOTS set of parameters + * + * Output parameters: + * params LMOTS parameters (w, p, ls, n, hash_method) + */ +__IPPCP_INLINE IppStatus setLMOTSParams(IppsLMOTSAlgo lmotsOIDAlgo, cpLMOTSParams* params) { + switch (lmotsOIDAlgo) { + case LMOTS_SHA256_N32_W1: { + params->w = 1; + params->p = 265; + params->ls = 7; + break; + } + case LMOTS_SHA256_N32_W2: { + params->w = 2; + params->p = 133; + params->ls = 6; + break; + } + case LMOTS_SHA256_N32_W4: { + params->w = 4; + params->p = 67; + params->ls = 4; + break; + } + case LMOTS_SHA256_N32_W8: { + params->w = 8; + params->p = 34; + params->ls = 0; + break; + } + case LMOTS_SHA256_N24_W1: { + params->w = 1; + params->p = 200; + params->ls = 8; + break; + } + case LMOTS_SHA256_N24_W2: { + params->w = 2; + params->p = 101; + params->ls = 6; + break; + } + case LMOTS_SHA256_N24_W4 : { + params->w = 4; + params->p = 51; + params->ls = 4; + break; + } + case LMOTS_SHA256_N24_W8 : { + params->w = 8; + params->p = 26; + params->ls = 0; + break; + } + default: return ippStsBadArgErr; + } + params->hash_method = (IppsHashMethod*) ippsHashMethod_SHA256_TT(); + + if(lmotsOIDAlgo <= LMOTS_SHA256_N32_W8) { + params->n = 32; + } + else { + params->n = 24; + } + return ippStsNoErr; +} + +/* + * f(S, i, w) is the i-th, w-bit value, if S + * is interpreted as a sequence of w-bit values + * + * Input parameters: + * S a string to calculate coef + * i output element position + * w the length of the output element + * + * Output parameters: + * Target element of a specified length + * + */ +__IPPCP_INLINE Ipp32u cpCoef(Ipp8u* S, Ipp32u i, Ipp32u w) { + return ((1 << w) - 1) & ( S[(i * w) / 8] >> (8 - (w * (i % (8 / w)) + w))); +} + +__IPPCP_INLINE Ipp32u cpCksm(Ipp8u* S, cpLMOTSParams lmotsParams) { + Ipp32u w = lmotsParams.w; + Ipp32u n = lmotsParams.n; + Ipp32u ls = lmotsParams.ls; + + Ipp32u cksmQ = 0; //sum is a 16-bit unsigned integer + Ipp32u cksmItrLimit = (8 * n) / w; + for (Ipp32u i = 0; i < cksmItrLimit; i++) { + cksmQ = cksmQ + ((1 << w) - 1) - cpCoef(S, i, w); + } + cksmQ = cksmQ << ls; + + return cksmQ; +} + +#endif /* #ifndef IPPCP_LMOTS_H_ */ diff --git a/sources/include/lms_internal/lms.h b/sources/include/lms_internal/lms.h new file mode 100644 index 00000000..94a341d8 --- /dev/null +++ b/sources/include/lms_internal/lms.h @@ -0,0 +1,112 @@ +/************************************************************************* +* Copyright (C) 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*************************************************************************/ + +#ifndef IPPCP_LMS_H_ +#define IPPCP_LMS_H_ + +#include "owndefs.h" +#include "owncp.h" +#include "lms_internal/lmots.h" + +#define CP_CKSM_BYTESIZE (2) +#define CP_PK_I_BYTESIZE (16) +#define CP_LMS_MAX_HASH_BYTESIZE (32) +#define CP_SIG_MAX_Y_WORDSIZE (265) + +/* Constants used to distinguish hashes in the system */ +#define D_PBLC (0x8080) +#define D_MESG (0x8181) +#define D_LEAF (0x8282) +#define D_INTR (0x8383) + +/* LMS algorithms params. "Table 2" LMS spec. */ +typedef struct { + Ipp32u m; + Ipp32u h; + IppsHashMethod* hash_method; +} cpLMSParams; + +/* + * Standard format of LMS public key: + * | u32str(type) || u32str(otstype) || I || T[1] | + * | 4 bytes || 4 bytes || 16 bytes || n bytes | +*/ +struct _cpLMSPublicKeyState { + Ipp32u _idCtx; // Pub key ctx identifier + IppsLMSAlgo lmsOIDAlgo; + IppsLMOTSAlgo lmotsOIDAlgo; + Ipp8u I[CP_PK_I_BYTESIZE]; + Ipp8u* T1; +}; + +/* + * Standard data format for LMS signature + * | 4 bytes || ... || 4 bytes || n bytes || n bytes ||...|| n bytes | + * | q || lmots_sig || lms_sigtype || path[0] || path[1] ||...|| path[h-1] | + */ +struct _cpLMSSignatureState { + Ipp32u _idCtx; // Signature ctx identifier + Ipp32u _q; + _cpLMOTSSignatureState _lmotsSig; + IppsLMSAlgo _lmsOIDAlgo; + Ipp8u* _pAuthPath; + // path[0] || path[1] ||...|| path[h-1] + // C + // Y[0] || Y[1] ||...|| Y[p-1] +}; + +/* Defines to handle contexts IDs */ +#define CP_LMS_SET_CTX_ID(ctx) ((ctx)->_idCtx = (Ipp32u)idCtxLMS ^ (Ipp32u)IPP_UINT_PTR(ctx)) +#define CP_LMS_VALID_CTX_ID(ctx) ((((ctx)->_idCtx) ^ (Ipp32u)IPP_UINT_PTR(ctx)) == (Ipp32u)idCtxLMS) + +/* + * Set LMS parameters + * + * Returns: Reason: + * ippStsBadArgErr lmsOIDAlgo > Max value for IppsLMSAlgo + * lmsOIDAlgo < Min value for IppsLMSAlgo + * ippStsNoErr no errors + * + * Input parameters: + * lmsOIDAlgo id of LMS set of parameters + * + * Output parameters: + * params LMS parameters (h, m, hash_method) + */ +__IPPCP_INLINE IppStatus setLMSParams(IppsLMSAlgo lmsOIDAlgo, cpLMSParams* params) { + /* Set h */ + switch (lmsOIDAlgo % 5) { + case 0: { params->h = 5; break; } // LMS_SHA256_M32_H5 and LMS_SHA256_M24_H5 + case 1: { params->h = 10; break; } // LMS_SHA256_M32_H10 and LMS_SHA256_M24_H10 + case 2: { params->h = 15; break; } // LMS_SHA256_M32_H15 and LMS_SHA256_M24_H15 + case 3: { params->h = 20; break; } // LMS_SHA256_M32_H20 and LMS_SHA256_M24_H20 + case 4: { params->h = 25; break; } // LMS_SHA256_M32_H25 and LMS_SHA256_M24_H25 + default: return ippStsBadArgErr; + } + + if(lmsOIDAlgo <= LMS_SHA256_M32_H25) { + params->m = 32; + } + else { + params->m = 24; + } + + params->hash_method = (IppsHashMethod*) ippsHashMethod_SHA256_TT(); + + return ippStsNoErr; +} + +#endif /* #ifndef IPPCP_LMS_H_ */ diff --git a/sources/include/owndefs.h b/sources/include/owndefs.h index 7fa1a040..15df89d1 100644 --- a/sources/include/owndefs.h +++ b/sources/include/owndefs.h @@ -34,15 +34,15 @@ #include "ippcpdefs.h" #endif -#if !defined(__INLINE) +#if !defined(__IPPCP_INLINE) #if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER) || defined(_MSC_VER) - #define __INLINE static __inline + #define __IPPCP_INLINE static __inline #elif defined( __GNUC__ ) - #define __INLINE static __inline__ + #define __IPPCP_INLINE static __inline__ #else - #define __INLINE static + #define __IPPCP_INLINE static #endif -#endif /*__INLINE*/ +#endif /*__IPPCP_INLINE*/ /* TODO: to check ICX compiler */ #if !defined(__NOINLINE) @@ -59,7 +59,7 @@ #if defined(_MSC_VER) #define __FORCEINLINE __forceinline #elif defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER) || defined( __GNUC__ ) - #define __FORCEINLINE __INLINE __attribute__((always_inline)) + #define __FORCEINLINE __IPPCP_INLINE __attribute__((always_inline)) #else #define __FORCEINLINE #endif @@ -267,7 +267,7 @@ #endif #if ((_IPP_ARCH == _IPP_ARCH_IA32)) -__INLINE Ipp32s IPP_INT_PTR ( const void* ptr ) +__IPPCP_INLINE Ipp32s IPP_INT_PTR ( const void* ptr ) { union { void* Ptr; @@ -277,7 +277,7 @@ __INLINE Ipp32s IPP_INT_PTR ( const void* ptr ) return dd.Int; } -__INLINE Ipp32u IPP_UINT_PTR( const void* ptr ) +__IPPCP_INLINE Ipp32u IPP_UINT_PTR( const void* ptr ) { union { void* Ptr; @@ -287,7 +287,7 @@ __INLINE Ipp32u IPP_UINT_PTR( const void* ptr ) return dd.Int; } #elif ((_IPP_ARCH == _IPP_ARCH_EM64T) || (_IPP_ARCH == _IPP_ARCH_LRB2)) -__INLINE Ipp64s IPP_INT_PTR( const void* ptr ) +__IPPCP_INLINE Ipp64s IPP_INT_PTR( const void* ptr ) { union { void* Ptr; @@ -297,7 +297,7 @@ __INLINE Ipp64s IPP_INT_PTR( const void* ptr ) return dd.Int; } -__INLINE Ipp64u IPP_UINT_PTR( const void* ptr ) +__IPPCP_INLINE Ipp64u IPP_UINT_PTR( const void* ptr ) { union { void* Ptr; @@ -386,7 +386,8 @@ typedef enum { idCtxSM3, idCtxAESXTS, idxCtxECES_SM2, - idCtxGFPECKE + idCtxGFPECKE, + idCtxLMS } IppCtxId; diff --git a/sources/include/stateful_sig_common/common.h b/sources/include/stateful_sig_common/common.h new file mode 100644 index 00000000..7f4bf150 --- /dev/null +++ b/sources/include/stateful_sig_common/common.h @@ -0,0 +1,34 @@ +/************************************************************************* +* Copyright (C) 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*************************************************************************/ + +/* + * Represent the `in` value as the `out` array that length is `outlen` + * !Works only for big-endian data! + * + * Input parameters: + * outlen length of resulted array + * in value that needs to be represent as an array + * Output parameters: + * out resulted array of bytes + */ + +__IPPCP_INLINE void toByte(Ipp8u *out, Ipp32s outlen, Ipp32u in) { + /* Iterate over out in decreasing order, for big-endianness. */ + for (Ipp32s i = outlen - 1; i >= 0; i--) { + out[i] = (Ipp8u)(in & 0xff); + in = in >> /*bitsize of 1 byte*/ 8; + } +} diff --git a/sources/include/utils.inc b/sources/include/utils.inc index 8d36d1aa..ac8dda6b 100644 --- a/sources/include/utils.inc +++ b/sources/include/utils.inc @@ -58,7 +58,7 @@ ; The macro searches intersection between two lists. ; Input: two comma-separated lists, enclosed in curly braces. ; Output: -; - Intersection will be located in the %$instersection context macro (can be empty). +; - Intersection will be located in the %$intersection context macro (can be empty). ; - Count of intersection elements list will be stored in the %$cardinality context variable. %macro INTERSECT 2.nolist %ifnctx _INTERSECT_CTX_ diff --git a/sources/include/xmss_internal/wots.h b/sources/include/xmss_internal/wots.h index 25dc197b..89cc8832 100644 --- a/sources/include/xmss_internal/wots.h +++ b/sources/include/xmss_internal/wots.h @@ -20,6 +20,8 @@ #include "owndefs.h" #include "pcptool.h" +#include "stateful_sig_common/common.h" + // WOTS+ algorithms params. See 3.1.1. XMSS spec. typedef struct { Ipp32s n; @@ -60,7 +62,7 @@ IPP_OWN_DECL(IppStatus, WOTS_pkFromSig, (const Ipp8u* M, Ipp8u* sig, Ipp8u* pSee * adrs changed array of bytes */ -__INLINE void set_adrs_idx(Ipp8u* adrs, Ipp32u idx, int word_id){ +__IPPCP_INLINE void set_adrs_idx(Ipp8u* adrs, Ipp32u idx, int word_id){ adrs[4 * word_id + 3] = (Ipp8u) idx & 0xff; adrs[4 * word_id + 2] = (Ipp8u)(idx >> 8) & 0xff; adrs[4 * word_id + 1] = (Ipp8u)(idx >> 16) & 0xff; @@ -77,28 +79,10 @@ __INLINE void set_adrs_idx(Ipp8u* adrs, Ipp32u idx, int word_id){ * word_id int32 idx in the adrs array */ -__INLINE Ipp8u set_adrs_1_byte(int word_id){ +__IPPCP_INLINE Ipp8u set_adrs_1_byte(int word_id){ return (Ipp8u)(4 * word_id + 3); } -/* - * Represent the `in` value as the `out` array that length is `outlen` - * - * Input parameters: - * outlen length of resulted array - * in value that needs to be represent as an array - * Output parameters: - * out resulted array of bytes - */ - -__INLINE void toByte(Ipp8u *out, Ipp32s outlen, Ipp32u in) { - /* Iterate over out in decreasing order, for big-endianness. */ - for (Ipp32s i = outlen - 1; i >= 0; i--) { - out[i] = (Ipp8u)(in & 0xff); - in = in >> /*bitsize of 1 byte*/ 8; - } -} - /* * Implement a ceil function that returns the smallest integer greater than or equal to x. * @@ -106,7 +90,7 @@ __INLINE void toByte(Ipp8u *out, Ipp32s outlen, Ipp32u in) { * x double precision floating point value */ -__INLINE Ipp32s cpCeil(double x) { +__IPPCP_INLINE Ipp32s cpCeil(double x) { Ipp32s int_val = (Ipp32s) x; if(int_val == x || x <= 0.0){ return int_val; diff --git a/sources/include/xmss_internal/xmss.h b/sources/include/xmss_internal/xmss.h index 6145cda7..b765bd74 100644 --- a/sources/include/xmss_internal/xmss.h +++ b/sources/include/xmss_internal/xmss.h @@ -98,7 +98,7 @@ IPP_OWN_DECL(IppStatus, rand_hash, (Ipp8u* left, Ipp8u* right, Ipp8u* seed, * params WOTS parameters (w, log2_w, n, len, len_1, hash_method) */ -__INLINE IppStatus setXMSSParams(IppsXMSSAlgo OIDAlgo, Ipp32s* h, cpWOTSParams* params) { +__IPPCP_INLINE IppStatus setXMSSParams(IppsXMSSAlgo OIDAlgo, Ipp32s* h, cpWOTSParams* params) { // Digits below are from the XMSS algo spec // don't depend on the algo diff --git a/sources/ippcp/CMakeLists.txt b/sources/ippcp/CMakeLists.txt index 82389d48..18c9be24 100644 --- a/sources/ippcp/CMakeLists.txt +++ b/sources/ippcp/CMakeLists.txt @@ -184,7 +184,7 @@ if(IPP_REVISION) endif() # Enable tech-preview feature in the library -set(LIBRARY_DEFINES "${LIBRARY_DEFINES} -DIPPCP_PREVIEW_XMSS") +set(LIBRARY_DEFINES "${LIBRARY_DEFINES} -DIPPCP_PREVIEW_XMSS -DIPPCP_PREVIEW_LMS") set(LIBRARY_DEFINES "${LIBRARY_DEFINES} -D_NO_IPP_DEPRECATED") # do not warn about ippcp deprecated functions # set BN_OPENSSL_DISABLE for Intel IPP Cryptography @@ -232,6 +232,7 @@ file(GLOB LIBRARY_C_SOURCES_ORIGINAL ${IPP_CRYPTO_SOURCES_DIR}/ecnist/*.c ${IPP_CRYPTO_SOURCES_DIR}/sm2/*.c ${IPP_CRYPTO_SOURCES_DIR}/xmss/*.c + ${IPP_CRYPTO_SOURCES_DIR}/lms/*.c ) file(GLOB LIBRARY_ASM_SOURCES_ORIGINAL diff --git a/sources/ippcp/aes_gcm_avx512_structures.h b/sources/ippcp/aes_gcm_avx512_structures.h index 9be2fbe6..cc16b8f3 100644 --- a/sources/ippcp/aes_gcm_avx512_structures.h +++ b/sources/ippcp/aes_gcm_avx512_structures.h @@ -14,14 +14,14 @@ * limitations under the License. *************************************************************************/ -/* -// +/* +// // Purpose: // Cryptography Primitive. -// AES GCM otimized for AVX512 and AVX512-VAES features +// AES GCM optimized for AVX512 and AVX512-VAES features // Internal Definitions -// -// +// +// */ #ifndef __AES_GCM_AVX512_STRUCTURES_H_ diff --git a/sources/ippcp/asm_ia32/pcpaesgcmg9as.asm b/sources/ippcp/asm_ia32/pcpaesgcmg9as.asm index 65050014..ad10136c 100644 --- a/sources/ippcp/asm_ia32/pcpaesgcmg9as.asm +++ b/sources/ippcp/asm_ia32/pcpaesgcmg9as.asm @@ -170,6 +170,91 @@ IPPASM AesGcmPrecompute_avx,PUBLIC ret ENDFUNC AesGcmPrecompute_avx +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; void AesGcmPrecompute_avx2_vaes(const Ipp8u* pRefHkey, Ipp8u* pMultipliers); +;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +align IPP_ALIGN_FACTOR +IPPASM AesGcmPrecompute_avx2_vaes,PUBLIC + USES_GPR esi + +%xdefine pHkey [esp + ARG_1 + 0*sizeof(dword)] ; pointer to the reflected hkey +%xdefine pMultipliers [esp + ARG_1 + 1*sizeof(dword)] ; output to the precomputed multipliers + + LD_ADDR esi, CONST_TABLE + + mov eax, pHkey + movdqu xmm0, oword [eax] ; xmm0 holds HashKey + pshufb xmm0, u128_str + + ; precompute HashKey<<1 mod poly from the HashKey + movdqa xmm4, xmm0 + psllq xmm0, 1 + psrlq xmm4, 63 + movdqa xmm3, xmm4 + pslldq xmm4, 8 + psrldq xmm3, 8 + por xmm0, xmm4 + ;reduction + pshufd xmm4, xmm3, 00100100b + pcmpeqd xmm4, oword TWOONE ; TWOONE = 0x00000001000000000000000000000001 + pand xmm4, oword POLY + pxor xmm0, xmm4 ; xmm0 holds the HashKey<<1 mod poly + + mov eax, pMultipliers + movdqu oword [eax+sizeof_oword_*0], xmm0 + + movdqa xmm1, xmm0 + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^2)<<1 mod poly + movdqu oword [eax+sizeof_oword_*1], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^3)<<1 mod poly + movdqu oword [eax+sizeof_oword_*2], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^4)<<1 mod poly + movdqu oword [eax+sizeof_oword_*3], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^5)<<1 mod poly + movdqu oword [eax+sizeof_oword_*4], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^6)<<1 mod poly + movdqu oword [eax+sizeof_oword_*5], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^7)<<1 mod poly + movdqu oword [eax+sizeof_oword_*6], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^8)<<1 mod poly + movdqu oword [eax+sizeof_oword_*7], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^9)<<1 mod poly + movdqu oword [eax+sizeof_oword_*8], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^10)<<1 mod poly + movdqu oword [eax+sizeof_oword_*9], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^11)<<1 mod poly + movdqu oword [eax+sizeof_oword_*10], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^12)<<1 mod poly + movdqu oword [eax+sizeof_oword_*11], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^13)<<1 mod poly + movdqu oword [eax+sizeof_oword_*12], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^14)<<1 mod poly + movdqu oword [eax+sizeof_oword_*13], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^15)<<1 mod poly + movdqu oword [eax+sizeof_oword_*14], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^16)<<1 mod poly + movdqu oword [eax+sizeof_oword_*15], xmm1 + + REST_GPR + ret +ENDFUNC AesGcmPrecompute_avx2_vaes + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; diff --git a/sources/ippcp/asm_ia32/pcpbnu.inc b/sources/ippcp/asm_ia32/pcpbnu.inc index 81a171cf..1bbefcc9 100644 --- a/sources/ippcp/asm_ia32/pcpbnu.inc +++ b/sources/ippcp/asm_ia32/pcpbnu.inc @@ -29,7 +29,7 @@ ; ;; -;; CMP_BNU comare arbitrary BNUs +;; CMP_BNU compare arbitrary BNUs ;; ;; input ;; rSrc1 points BNU1 diff --git a/sources/ippcp/asm_ia32/pcprij128safedecm5as.asm b/sources/ippcp/asm_ia32/pcprij128safedecm5as.asm index 3a6460d2..f57be534 100644 --- a/sources/ippcp/asm_ia32/pcprij128safedecm5as.asm +++ b/sources/ippcp/asm_ia32/pcprij128safedecm5as.asm @@ -311,7 +311,7 @@ IPPASM Safe2Decrypt_RIJ128,PUBLIC mov ebp, esp ; save original esp to use it to reach parameters %xdefine pInp [ebp + ARG_1 + 0*sizeof(dword)] ; input buffer -%xdefine pOut [ebp + ARG_1 + 1*sizeof(dword)] ; outpu buffer +%xdefine pOut [ebp + ARG_1 + 1*sizeof(dword)] ; output buffer %xdefine nrounds [ebp + ARG_1 + 2*sizeof(dword)] ; number of rounds %xdefine pRK [ebp + ARG_1 + 3*sizeof(dword)] ; round keys %xdefine pSbox [ebp + ARG_1 + 4*sizeof(dword)] ; S-box diff --git a/sources/ippcp/asm_ia32/pcpvariant.inc b/sources/ippcp/asm_ia32/pcpvariant.inc index b458b867..64b5c32e 100644 --- a/sources/ippcp/asm_ia32/pcpvariant.inc +++ b/sources/ippcp/asm_ia32/pcpvariant.inc @@ -44,7 +44,7 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; -;; it possible to force use of C-version of some implementtaions +;; it possible to force use of C-version of some implementations ;; instead of ASM one ;; %ifndef _USE_C_cpAdd_BNU_ diff --git a/sources/ippcp/asm_intel64/pcpaesgcme9as.asm b/sources/ippcp/asm_intel64/pcpaesgcme9as.asm index c80be6aa..62384c3b 100644 --- a/sources/ippcp/asm_intel64/pcpaesgcme9as.asm +++ b/sources/ippcp/asm_intel64/pcpaesgcme9as.asm @@ -221,7 +221,7 @@ my_pclmulqdq %%tmpX1, %%HK, 11h ;; tmpX1 = a1*b1 xmm1 = GH %if (_IPP32E >= _IPP32E_Y8) -segment .text align=IPP_ALIGN_FACTOR +segment .data align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR POLY DQ 00000000000000001h,0C200000000000000h ;; 0xC2000000000000000000000000000001 @@ -231,11 +231,12 @@ MASK1 DQ 0ffffffffffffffffh,00000000000000000h ;; 0x0000000000000000ff MASK2 DQ 00000000000000000h,0ffffffffffffffffh ;; 0xffffffffffffffff0000000000000000 INC_1 DQ 1,0 +segment .text align=IPP_ALIGN_FACTOR %assign sizeof_oword_ (16) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; -;;; void AesGcmPrecomute_avx(Ipp8u* pPrecomutedData, const Ipp8u* pHKey) +;;; void AesGcmPrecomute_avx(Ipp8u* pPrecomData, const Ipp8u* pHKey) ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; align IPP_ALIGN_FACTOR @@ -247,7 +248,7 @@ IPPASM AesGcmPrecompute_avx,PUBLIC %xdefine pPrecomData rdi ; (rdi) pointer to the reflected multipliers reflect(hkey),(hkey<<1), (hkey^2)<<1, (hkey^4)<<1, %xdefine pHKey rsi ; (rsi) pointer to the Hkey value - movdqu xmm0, oword [rel pHKey] ; xmm0 holds HashKey + movdqu xmm0, oword [pHKey] ; xmm0 holds HashKey pshufb xmm0, [rel SHUF_CONST] ;movdqu oword [pPrecomData+sizeof_oword_*0], xmm0 @@ -280,6 +281,92 @@ IPPASM AesGcmPrecompute_avx,PUBLIC ret ENDFUNC AesGcmPrecompute_avx +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; void AesGcmPrecompute_avx2_vaes(Ipp8u* pPrecomputedData, const Ipp8u* pHKey) +;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; calculates 16 hash keys - HashKey<<1, (HashKey^2)<<1, ..., (HashKey^16)<<1 +align IPP_ALIGN_FACTOR +IPPASM AesGcmPrecompute_avx2_vaes,PUBLIC + USES_GPR rdi,rsi + USES_XMM xmm6,xmm7,xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15 + COMP_ABI 2 + +%xdefine pPrecomputedData rdi ; (rdi) pointer to the reflected multipliers reflect(hkey),(hkey<<1), (hkey^2)<<1, (hkey^4)<<1, +%xdefine pHKey rsi ; (rsi) pointer to the Hkey value + + movdqu xmm0, oword [pHKey] ; xmm0 holds HashKey + pshufb xmm0, [rel SHUF_CONST] + + ; precompute HashKey<<1 mod poly from the HashKey + movdqa xmm4, xmm0 + psllq xmm0, 1 + psrlq xmm4, 63 + movdqa xmm3, xmm4 + pslldq xmm4, 8 + psrldq xmm3, 8 + por xmm0, xmm4 + ;reduction + pshufd xmm4, xmm3, 00100100b + pcmpeqd xmm4, oword [rel TWOONE] ; [TWOONE] = 0x00000001000000000000000000000001 + pand xmm4, oword [rel POLY] + pxor xmm0, xmm4 ; xmm0 holds the HashKey<<1 mod poly + + movdqu oword [pPrecomputedData+sizeof_oword_*0], xmm0 + + movdqa xmm1, xmm0 + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^2)<<1 mod poly + movdqu oword [pPrecomputedData+sizeof_oword_*1], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^3)<<1 mod poly + movdqu oword [pPrecomputedData+sizeof_oword_*2], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^4)<<1 mod poly + movdqu oword [pPrecomputedData+sizeof_oword_*3], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^5)<<1 mod poly + movdqu oword [pPrecomputedData+sizeof_oword_*4], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^6)<<1 mod poly + movdqu oword [pPrecomputedData+sizeof_oword_*5], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^7)<<1 mod poly + movdqu oword [pPrecomputedData+sizeof_oword_*6], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^8)<<1 mod poly + movdqu oword [pPrecomputedData+sizeof_oword_*7], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^9)<<1 mod poly + movdqu oword [pPrecomputedData+sizeof_oword_*8], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^10)<<1 mod poly + movdqu oword [pPrecomputedData+sizeof_oword_*9], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^11)<<1 mod poly + movdqu oword [pPrecomputedData+sizeof_oword_*10], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^12)<<1 mod poly + movdqu oword [pPrecomputedData+sizeof_oword_*11], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^13)<<1 mod poly + movdqu oword [pPrecomputedData+sizeof_oword_*12], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^14)<<1 mod poly + movdqu oword [pPrecomputedData+sizeof_oword_*13], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^15)<<1 mod poly + movdqu oword [pPrecomputedData+sizeof_oword_*14], xmm1 + + sse_clmul_gcm xmm1, xmm0, xmm3, xmm4, xmm5 ; xmm1 holds (HashKey^16)<<1 mod poly + movdqu oword [pPrecomputedData+sizeof_oword_*15], xmm1 + + REST_XMM + REST_GPR + ret +ENDFUNC AesGcmPrecompute_avx2_vaes + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; void AesGcmMulGcm_avx(Ipp8u* pHash, const Ipp8u* pHKey) @@ -294,14 +381,14 @@ IPPASM AesGcmMulGcm_avx,PUBLIC %xdefine pHash rdi ; (rdi) pointer to the Hash value %xdefine pHKey rsi ; (rsi) pointer to the (hkey<<1) value - movdqa xmm0, oword [rel pHash] + movdqa xmm0, oword [pHash] pshufb xmm0, [rel SHUF_CONST] - movdqa xmm1, oword [rel pHKey] + movdqa xmm1, oword [pHKey] sse_clmul_gcm xmm0, xmm1, xmm2, xmm3, xmm4 ; xmm0 holds Hash*HKey mod poly pshufb xmm0, [rel SHUF_CONST] - movdqa oword [rel pHash], xmm0 + movdqa oword [pHash], xmm0 REST_XMM REST_GPR @@ -326,15 +413,15 @@ IPPASM AesGcmAuth_avx,PUBLIC %assign BYTES_PER_BLK (16) - movdqa xmm0, oword [rel pHash] + movdqa xmm0, oword [pHash] pshufb xmm0, [rel SHUF_CONST] - movdqa xmm1, oword [rel pHKey] + movdqa xmm1, oword [pHKey] movsxd rdx, edx align IPP_ALIGN_FACTOR .auth_loop: - movdqu xmm2, oword [rel pSrc] ; src[] + movdqu xmm2, oword [pSrc] ; src[] pshufb xmm2, [rel SHUF_CONST] add pSrc, BYTES_PER_BLK pxor xmm0, xmm2 ; hash ^= src[] diff --git a/sources/ippcp/asm_intel64/pcpaesgcmtable2ku8as.asm b/sources/ippcp/asm_intel64/pcpaesgcmtable2ku8as.asm index a9674477..9a9139a5 100644 --- a/sources/ippcp/asm_intel64/pcpaesgcmtable2ku8as.asm +++ b/sources/ippcp/asm_intel64/pcpaesgcmtable2ku8as.asm @@ -30,7 +30,7 @@ %if (_IPP32E >= _IPP32E_U8) -segment .text align=IPP_ALIGN_FACTOR +segment .data align=IPP_ALIGN_FACTOR ; ; getAesGcmConst_table_ct provides c-e-t access to pre-computed Ipp16u AesGcmConst_table[256] @@ -49,6 +49,7 @@ align IPP_ALIGN_FACTOR INIT_IDX dw 000h,001h,002h,003h,004h,005h,006h,007h ;; initial search inx = {0:1:2:3:4:5:6:7} INCR_IDX dw 008h,008h,008h,008h,008h,008h,008h,008h ;; index increment = {8:8:8:8:8:8:8:8} +segment .text align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR IPPASM getAesGcmConst_table_ct,PRIVATE pxor xmm2, xmm2 ;; accumulator xmm2 = 0 diff --git a/sources/ippcp/asm_intel64/pcpbnum7.inc b/sources/ippcp/asm_intel64/pcpbnum7.inc index acd6ff93..9657f25e 100644 --- a/sources/ippcp/asm_intel64/pcpbnum7.inc +++ b/sources/ippcp/asm_intel64/pcpbnum7.inc @@ -111,7 +111,7 @@ %endmacro ;; -;; CMP_BNU comare BNUs +;; CMP_BNU compare BNUs ;; ;; input ;; rSrc1 points BNU1 diff --git a/sources/ippcp/asm_intel64/pcpmred.inc b/sources/ippcp/asm_intel64/pcpmred.inc index 5bbc8e4a..bbbd1adc 100644 --- a/sources/ippcp/asm_intel64/pcpmred.inc +++ b/sources/ippcp/asm_intel64/pcpmred.inc @@ -474,7 +474,7 @@ DECLARE_FUNC mred_N,PRIVATE lea rdi, [rdi+rdx*sizeof(qword)] - ; accumulate carryLCL and update hight product above + ; accumulate carryLCL and update high product above pop rax shr rax, 1 mov rbx, rdx @@ -502,7 +502,7 @@ DECLARE_FUNC mred_N,PRIVATE pop rbp ; mul_8xn procedure - ; accumulate carryGBL and store hight product above + ; accumulate carryGBL and store high product above pop rbx ; carryGBL add r8, rbx mov qword [rdi+sizeof(qword)*0], r8 diff --git a/sources/ippcp/asm_intel64/pcprij128ccme9as.asm b/sources/ippcp/asm_intel64/pcprij128ccme9as.asm index 2198333a..450c9b31 100644 --- a/sources/ippcp/asm_intel64/pcprij128ccme9as.asm +++ b/sources/ippcp/asm_intel64/pcprij128ccme9as.asm @@ -31,12 +31,13 @@ %if (_AES_NI_ENABLING_ == _FEATURE_ON_) || (_AES_NI_ENABLING_ == _FEATURE_TICKTOCK_) %if (_IPP32E >= _IPP32E_Y8) -segment .text align=IPP_ALIGN_FACTOR +segment .data align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR u128_str DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 increment DQ 1,0 +segment .text align=IPP_ALIGN_FACTOR ;*************************************************************** ;* Purpose: Authenticate and Encrypt ;* diff --git a/sources/ippcp/asm_intel64/pcprij128encryptctrpipee9as.asm b/sources/ippcp/asm_intel64/pcprij128encryptctrpipee9as.asm index 1fb20d81..c0e93766 100644 --- a/sources/ippcp/asm_intel64/pcprij128encryptctrpipee9as.asm +++ b/sources/ippcp/asm_intel64/pcprij128encryptctrpipee9as.asm @@ -31,12 +31,13 @@ %if (_AES_NI_ENABLING_ == _FEATURE_ON_) || (_AES_NI_ENABLING_ == _FEATURE_TICKTOCK_) %if (_IPP32E >= _IPP32E_Y8) -segment .text align=IPP_ALIGN_FACTOR +segment .data align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR u128_str DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +segment .text align=IPP_ALIGN_FACTOR ;*************************************************************** ;* Purpose: pipelined RIJ128 CTR encryption/decryption ;* diff --git a/sources/ippcp/asm_intel64/pcprij128encryptxtse9as.asm b/sources/ippcp/asm_intel64/pcprij128encryptxtse9as.asm index f86abb2e..1e2d0471 100644 --- a/sources/ippcp/asm_intel64/pcprij128encryptxtse9as.asm +++ b/sources/ippcp/asm_intel64/pcprij128encryptxtse9as.asm @@ -30,13 +30,13 @@ %if (_AES_NI_ENABLING_ == _FEATURE_ON_) || (_AES_NI_ENABLING_ == _FEATURE_TICKTOCK_) %if (_IPP32E >= _IPP32E_Y8) -segment .text align=IPP_ALIGN_FACTOR +segment .data align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR ALPHA_MUL_CNT dq 00000000000000087h, 00000000000000001h - +segment .text align=IPP_ALIGN_FACTOR ;*************************************************************** ;* Purpose: AES-XTS encryption ;* diff --git a/sources/ippcp/asm_intel64/pcprij128safedecu8as.asm b/sources/ippcp/asm_intel64/pcprij128safedecu8as.asm index 0648b82b..c6c44880 100644 --- a/sources/ippcp/asm_intel64/pcprij128safedecu8as.asm +++ b/sources/ippcp/asm_intel64/pcprij128safedecu8as.asm @@ -107,9 +107,7 @@ PLOOKUP_MEM %%xmmB, %%xmmT, GF16_expTbl %endmacro -segment .text align=IPP_ALIGN_FACTOR - - +segment .data align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR DECODE_DATA: @@ -268,7 +266,7 @@ GF16mul_2_6x \ ColumnROR \ DB 1,2,3,0,5,6,7,4,9,10,11,8,13,14,15,12 - +segment .text align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR ;************************************************************* ;* void SafeDecrypt_RIJ128(const Ipp8u* pInpBlk, diff --git a/sources/ippcp/asm_intel64/pcprij128safeencu8as.asm b/sources/ippcp/asm_intel64/pcprij128safeencu8as.asm index 5d3827b9..d75f112b 100644 --- a/sources/ippcp/asm_intel64/pcprij128safeencu8as.asm +++ b/sources/ippcp/asm_intel64/pcprij128safeencu8as.asm @@ -107,7 +107,7 @@ PLOOKUP_MEM %%xmmB, %%xmmT, GF16_expTbl %endmacro -segment .text align=IPP_ALIGN_FACTOR +segment .data align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR @@ -256,6 +256,7 @@ ColumnROR \ DB 1,2,3,0,5,6,7,4,9,10,11,8,13,14,15,12 +segment .text align=IPP_ALIGN_FACTOR ;************************************************************* ; convert GF(2^128) -> GF((2^4)^2) ;************************************************************* diff --git a/sources/ippcp/asm_intel64/pcpsha1e9as.asm b/sources/ippcp/asm_intel64/pcpsha1e9as.asm index 164fa7c3..badb442a 100644 --- a/sources/ippcp/asm_intel64/pcpsha1e9as.asm +++ b/sources/ippcp/asm_intel64/pcpsha1e9as.asm @@ -290,7 +290,7 @@ mov %%hash0, %%hashAdd %endmacro -segment .text align=IPP_ALIGN_FACTOR +segment .data align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR @@ -305,6 +305,7 @@ shuffle_mask DD 00010203h DD 08090a0bh DD 0c0d0e0fh +segment .text align=IPP_ALIGN_FACTOR ;***************************************************************************************** ;* Purpose: Update internal digest according to message block ;* diff --git a/sources/ippcp/asm_intel64/pcpsha1l9as.asm b/sources/ippcp/asm_intel64/pcpsha1l9as.asm index 6ab1b1c1..14b9fdbd 100644 --- a/sources/ippcp/asm_intel64/pcpsha1l9as.asm +++ b/sources/ippcp/asm_intel64/pcpsha1l9as.asm @@ -331,7 +331,7 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -segment .text align=IPP_ALIGN_FACTOR +segment .data align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR @@ -344,7 +344,7 @@ SHA1_YMM_K dd 05a827999h, 05a827999h, 05a827999h, 05a827999h, 05a827999h, 05 SHA1_YMM_BF dd 00010203h,04050607h,08090a0bh,0c0d0e0fh dd 00010203h,04050607h,08090a0bh,0c0d0e0fh - +segment .text align=IPP_ALIGN_FACTOR ;***************************************************************************************** ;* Purpose: Update internal digest according to message block ;* @@ -365,7 +365,7 @@ IPPASM UpdateSHA1,PUBLIC %xdefine MBS_SHA1 (64) - mov r15, rsp ; store orifinal rsp + mov r15, rsp ; store original rsp and rsp, -IPP_ALIGN_FACTOR ; 32-byte aligned stack movsxd r14, edx ; input length in bytes diff --git a/sources/ippcp/asm_intel64/pcpsha1m7as.asm b/sources/ippcp/asm_intel64/pcpsha1m7as.asm index dee0e7d1..d3e4cea9 100644 --- a/sources/ippcp/asm_intel64/pcpsha1m7as.asm +++ b/sources/ippcp/asm_intel64/pcpsha1m7as.asm @@ -258,9 +258,9 @@ mov [rsp+(%%nr & 0Fh)*4],%%regU %endmacro -segment .text align=IPP_ALIGN_FACTOR +segment .text align=IPP_ALIGN_FACTOR ;***************************************************************************************** ;* Purpose: Update internal digest according to message block ;* diff --git a/sources/ippcp/asm_intel64/pcpsha1nias.asm b/sources/ippcp/asm_intel64/pcpsha1nias.asm index 412cbea4..5d2f21b7 100644 --- a/sources/ippcp/asm_intel64/pcpsha1nias.asm +++ b/sources/ippcp/asm_intel64/pcpsha1nias.asm @@ -31,8 +31,7 @@ %if (_SHA_NI_ENABLING_ == _FEATURE_ON_) || (_SHA_NI_ENABLING_ == _FEATURE_TICKTOCK_) ;;%if (_IPP32E >= _IPP32E_Y8 ) -segment .text align=IPP_ALIGN_FACTOR - +segment .data align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR @@ -41,6 +40,7 @@ UPPER_DWORD_MASK \ PSHUFFLE_BYTE_FLIP_MASK \ DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +segment .text align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR ;***************************************************************************************** ;* Purpose: Update internal digest according to message block diff --git a/sources/ippcp/asm_intel64/pcpsha1u8as.asm b/sources/ippcp/asm_intel64/pcpsha1u8as.asm index 58eccb4f..08355cad 100644 --- a/sources/ippcp/asm_intel64/pcpsha1u8as.asm +++ b/sources/ippcp/asm_intel64/pcpsha1u8as.asm @@ -303,7 +303,7 @@ mov %%hash0, %%hashAdd %endmacro -segment .text align=IPP_ALIGN_FACTOR +segment .data align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR @@ -319,6 +319,7 @@ shuffle_mask DD 00010203h DD 0c0d0e0fh +segment .text align=IPP_ALIGN_FACTOR ;***************************************************************************************** ;* Purpose: Update internal digest according to message block ;* diff --git a/sources/ippcp/asm_intel64/pcpsha256e9as.asm b/sources/ippcp/asm_intel64/pcpsha256e9as.asm index ba656048..54bcd0c4 100644 --- a/sources/ippcp/asm_intel64/pcpsha256e9as.asm +++ b/sources/ippcp/asm_intel64/pcpsha256e9as.asm @@ -281,7 +281,7 @@ %endif %endmacro -segment .text align=IPP_ALIGN_FACTOR +segment .data align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR @@ -289,6 +289,7 @@ SHUFB_BSWAP DB 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12 SHUFD_ZZ10 DB 0,1,2,3, 8,9,10,11, 0ffh,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh SHUFD_32ZZ DB 0ffh,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh, 0,1,2,3, 8,9,10,11 +segment .text align=IPP_ALIGN_FACTOR ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; UpdateSHA256(Ipp32u digest[], Ipp8u dataBlock[], int datalen, Ipp32u K_256[]) diff --git a/sources/ippcp/asm_intel64/pcpsha256l9as.asm b/sources/ippcp/asm_intel64/pcpsha256l9as.asm index 2496f343..89a6a610 100644 --- a/sources/ippcp/asm_intel64/pcpsha256l9as.asm +++ b/sources/ippcp/asm_intel64/pcpsha256l9as.asm @@ -391,7 +391,7 @@ vmovdqa YMMWORD [rsi+(%%W_AHEAD/4)*sizeof(ymmword)+(%%round/4)*sizeof(ymmword)] ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -segment .text align=IPP_ALIGN_FACTOR +segment .data align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR @@ -421,6 +421,7 @@ SHA256_zzBA db 0,1,2,3, 8,9,10,11, 0ffh,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh db 0,1,2,3, 8,9,10,11, 0ffh,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh +segment .text align=IPP_ALIGN_FACTOR ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; UpdateSHA256(Ipp32u digest[], Ipp8u dataBlock[], int datalen, Ipp32u K_256[]) diff --git a/sources/ippcp/asm_intel64/pcpsha256m7as.asm b/sources/ippcp/asm_intel64/pcpsha256m7as.asm index a8f9643d..b60fe3e8 100644 --- a/sources/ippcp/asm_intel64/pcpsha256m7as.asm +++ b/sources/ippcp/asm_intel64/pcpsha256m7as.asm @@ -161,7 +161,6 @@ mov [rsp+((%%nr-16) & 0Fh)*4], %%sig0 %endmacro -segment .text align=IPP_ALIGN_FACTOR ;****************************************************************************************** @@ -191,12 +190,13 @@ segment .text align=IPP_ALIGN_FACTOR ;; Caller = ippsHMACSHA224MessageDigest ;; - +segment .data align=IPP_ALIGN_FACTOR %if (_IPP32E >= _IPP32E_U8) align IPP_ALIGN_FACTOR pByteSwp DB 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12 %endif +segment .text align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR IPPASM UpdateSHA256,PUBLIC %assign LOCAL_FRAME (16*sizeof(dword) + sizeof(qword)) diff --git a/sources/ippcp/asm_intel64/pcpsha256nias.asm b/sources/ippcp/asm_intel64/pcpsha256nias.asm index 8c47ab9c..e2d3838a 100644 --- a/sources/ippcp/asm_intel64/pcpsha256nias.asm +++ b/sources/ippcp/asm_intel64/pcpsha256nias.asm @@ -31,13 +31,14 @@ %if (_SHA_NI_ENABLING_ == _FEATURE_ON_) || (_SHA_NI_ENABLING_ == _FEATURE_TICKTOCK_) ;;%if (_IPP32E >= _IPP32E_Y8 ) -segment .text align=IPP_ALIGN_FACTOR +segment .data align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR PSHUFFLE_BYTE_FLIP_MASK \ DB 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12 +segment .text align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR ;***************************************************************************************** ;* Purpose: Update internal digest according to message block diff --git a/sources/ippcp/asm_intel64/pcpsha256u8as.asm b/sources/ippcp/asm_intel64/pcpsha256u8as.asm index d2920547..7df1ab13 100644 --- a/sources/ippcp/asm_intel64/pcpsha256u8as.asm +++ b/sources/ippcp/asm_intel64/pcpsha256u8as.asm @@ -271,12 +271,13 @@ movdqa %%xS, %%xS0 %endmacro -segment .text align=IPP_ALIGN_FACTOR +segment .data align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR pByteSwp DB 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12 +segment .text align=IPP_ALIGN_FACTOR ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; UpdateSHA256(Ipp32u digest[], Ipp8u dataBlock[], int datalen, Ipp32u K_256[]) diff --git a/sources/ippcp/asm_intel64/pcpsha512e9as.asm b/sources/ippcp/asm_intel64/pcpsha512e9as.asm index a260b9e3..b28a869c 100644 --- a/sources/ippcp/asm_intel64/pcpsha512e9as.asm +++ b/sources/ippcp/asm_intel64/pcpsha512e9as.asm @@ -466,11 +466,12 @@ ROTATE_W %endmacro -segment .text align=IPP_ALIGN_FACTOR +segment .data align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR SHUFB_BSWAP DB 7,6,5,4,3,2,1,0, 15,14,13,12,11,10,9,8 +segment .text align=IPP_ALIGN_FACTOR ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; UpdateSHA512(Ipp64u digest[], Ipp8u dataBlock[], int datalen, Ipp64u K_512[]) diff --git a/sources/ippcp/asm_intel64/pcpsha512l9as.asm b/sources/ippcp/asm_intel64/pcpsha512l9as.asm index d5641e03..51abc748 100644 --- a/sources/ippcp/asm_intel64/pcpsha512l9as.asm +++ b/sources/ippcp/asm_intel64/pcpsha512l9as.asm @@ -354,7 +354,7 @@ vmovdqa YMMWORD [rsi+(%%W_AHEAD/2)*sizeof(ymmword)+(%%nr/2)*sizeof(ymmword)],yT ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -segment .text align=IPP_ALIGN_FACTOR +segment .data align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR @@ -409,6 +409,7 @@ SHA512_YMM_BF dq 00001020304050607h, 008090a0b0c0d0e0fh, 00001020304050607h, 00 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +segment .text align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR IPPASM UpdateSHA512,PUBLIC %assign LOCAL_FRAME (sizeof(qword)*4 + sizeof(qword)*80*2) diff --git a/sources/ippcp/asm_intel64/pcpsha512m7as.asm b/sources/ippcp/asm_intel64/pcpsha512m7as.asm index 84be6606..f17bbcb8 100644 --- a/sources/ippcp/asm_intel64/pcpsha512m7as.asm +++ b/sources/ippcp/asm_intel64/pcpsha512m7as.asm @@ -208,10 +208,12 @@ segment .text align=IPP_ALIGN_FACTOR %xdefine KK_SHA512 rbp %if _IPP32E >= _IPP32E_U8 +segment .data align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR pByteSwp DB 7,6,5,4,3,2,1,0, 15,14,13,12,11,10,9,8 %endif +segment .text align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR IPPASM UpdateSHA512,PUBLIC %assign LOCAL_FRAME (16*sizeof(qword)+sizeof(qword)) diff --git a/sources/ippcp/asm_intel64/pcpsm2pfuncs_montas.asm b/sources/ippcp/asm_intel64/pcpsm2pfuncs_montas.asm index 1c3eeb13..97bb741c 100644 --- a/sources/ippcp/asm_intel64/pcpsm2pfuncs_montas.asm +++ b/sources/ippcp/asm_intel64/pcpsm2pfuncs_montas.asm @@ -28,7 +28,7 @@ %assign _xEMULATION_ 1 %assign _ADCX_ADOX_ 1 -segment .text align=IPP_ALIGN_FACTOR +segment .data align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR @@ -42,7 +42,7 @@ LOne DD 1,1,1,1,1,1,1,1 LTwo DD 2,2,2,2,2,2,2,2 LThree DD 3,3,3,3,3,3,3,3 - +segment .text align=IPP_ALIGN_FACTOR ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; void sm2_mul_by_2(uint64_t res[4], uint64_t a[4]); diff --git a/sources/ippcp/asm_intel64/pcpsm3e9as.asm b/sources/ippcp/asm_intel64/pcpsm3e9as.asm index 02cead2e..b3eb3139 100644 --- a/sources/ippcp/asm_intel64/pcpsm3e9as.asm +++ b/sources/ippcp/asm_intel64/pcpsm3e9as.asm @@ -687,7 +687,7 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;; %endmacro -segment .text align=IPP_ALIGN_FACTOR +segment .data align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR @@ -697,6 +697,7 @@ rol_32_8 DB 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 bcast DB 0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3 wzzz DB 80h,80h,80h,80h, 80h,80h,80h,80h, 80h,80h,80h,80h,12,13,14,15 +segment .text align=IPP_ALIGN_FACTOR ;******************************************************************** ;* void UpdateSM3(Ipp32u* hash, ; const Ipp8u* msg, int msgLen, diff --git a/sources/ippcp/asm_intel64/pcpsm3u8as.asm b/sources/ippcp/asm_intel64/pcpsm3u8as.asm index 495b8b8c..1e747c37 100644 --- a/sources/ippcp/asm_intel64/pcpsm3u8as.asm +++ b/sources/ippcp/asm_intel64/pcpsm3u8as.asm @@ -195,12 +195,13 @@ mov [rsp+ctr*sizeof(dword)+%%i*sizeof(dword)], t3 %endmacro -segment .text align=IPP_ALIGN_FACTOR +segment .data align=IPP_ALIGN_FACTOR align IPP_ALIGN_FACTOR bswap128 DB 3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12 +segment .text align=IPP_ALIGN_FACTOR ;******************************************************************** ;* void UpdateSM3(uint32_t hash[8], ; const uint32_t msg[16], int msgLen, diff --git a/sources/ippcp/asm_intel64/pcpvariant.inc b/sources/ippcp/asm_intel64/pcpvariant.inc index f13fc807..528646ca 100644 --- a/sources/ippcp/asm_intel64/pcpvariant.inc +++ b/sources/ippcp/asm_intel64/pcpvariant.inc @@ -44,7 +44,7 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; -;; it possible to force use of C-version of some implementtaions +;; it possible to force use of C-version of some implementations ;; instead of ASM one ;; %ifndef _USE_C_cpAdd_BNU_ diff --git a/sources/ippcp/crypto_mb/Readme.md b/sources/ippcp/crypto_mb/Readme.md index 72f6835b..a7958326 100644 --- a/sources/ippcp/crypto_mb/Readme.md +++ b/sources/ippcp/crypto_mb/Readme.md @@ -1,7 +1,7 @@ # Crypto Multi-buffer Library Currently, the library provides optimized version of the following algorithms: -1. RSA, ECDSA, ECDH, x25519, SM2 multi-buffer algorithms based on Intel® Advanced Vector Extensions 512 (Intel® AVX-512) integer fused multiply-add (IFMA) operations. This CPU feature is introduced with Intel® Microarchitecture Code Named Ice Lake. +1. RSA, ECDSA, ECDH, x25519, SM2 multi-buffer algorithms based on Intel® Advanced Vector Extensions 512 (Intel® AVX-512) integer fused multiply-add (IFMA) operations. This CPU feature is introduced with Intel® Microarchitecture Code Named Ice Lake. 2. SM4 based on Intel(R) Advanced Vector Extensions 512 (Intel(R) AVX-512) GFNI instructions. 3. SM3 based on Intel® Advanced Vector Extensions 512 (Intel® AVX-512) instructions. @@ -32,6 +32,7 @@ This library consists of highly-optimized kernels taking advantage of Intel’s - GCC 11.4 - Clang 9.0 - Clang 12.0 +- Clang 16.0 - GNU binutils 2.32 ### Windows* OS @@ -96,11 +97,11 @@ You can find the installed files in: ``` > **Note**: This project uses the default `RPATH` settings: > -> CMake is linking the executables and shared libraries with full `RPATH` to all used -> libraries in the build tree. When installing, CMake will clear the `RPATH` of these -> targets so they are installed with an empty `RPATH`. -> In this case to resolve the Crypto Multi-buffer Library dependency on OpenSSL it is -> necessary to update `LD_LIBRARY_PATH` with the path to the target OpenSSL library. +> CMake is linking the executables and shared libraries with full `RPATH` to all used +> libraries in the build tree. When installing, CMake will clear the `RPATH` of these +> targets so they are installed with an empty `RPATH`. +> In this case to resolve the Crypto Multi-buffer Library dependency on OpenSSL it is +> necessary to update `LD_LIBRARY_PATH` with the path to the target OpenSSL library. ## How to Build diff --git a/sources/ippcp/crypto_mb/include/crypto_mb/defs.h b/sources/ippcp/crypto_mb/include/crypto_mb/defs.h index 085376ae..a52516ab 100644 --- a/sources/ippcp/crypto_mb/include/crypto_mb/defs.h +++ b/sources/ippcp/crypto_mb/include/crypto_mb/defs.h @@ -33,8 +33,8 @@ typedef unsigned long long int64u; #define __ALIGN64 __attribute__((aligned(64))) #endif - #if !defined(__INLINE) - #define __INLINE static __inline__ + #if !defined(__MBX_INLINE) + #define __MBX_INLINE static __inline__ #endif #if !defined(__NOINLINE) @@ -45,8 +45,8 @@ typedef unsigned long long int64u; #define __ALIGN64 __declspec(align(64)) #endif - #if !defined(__INLINE) - #define __INLINE static __forceinline + #if !defined(__MBX_INLINE) + #define __MBX_INLINE static __forceinline #endif #if !defined(__NOINLINE) diff --git a/sources/ippcp/crypto_mb/include/crypto_mb/fips_cert.h b/sources/ippcp/crypto_mb/include/crypto_mb/fips_cert.h index 5a06286b..0f6461fe 100644 --- a/sources/ippcp/crypto_mb/include/crypto_mb/fips_cert.h +++ b/sources/ippcp/crypto_mb/include/crypto_mb/fips_cert.h @@ -109,7 +109,7 @@ EXTERN_C fips_test_status fips_selftest_mbx_rsa4k_private_crt_ssl_mb8(void); #endif // BN_OPEN_SSL_DISABLE -/* +/* // Enumerator that contains information about FIPS-approved // functions inside the crypto_mb cryptographic boundary */ @@ -138,11 +138,11 @@ enum FIPS_CRYPTO_MB_FUNC { nistp256_ecdsa_verify_mb8, nistp384_ecdsa_verify_mb8, nistp521_ecdsa_verify_mb8, - + ed25519_public_key_mb8, ed25519_sign_mb8, ed25519_verify_mb8, - + rsa_public_mb8, rsa_private_mb8, rsa_private_crt_mb8, @@ -170,7 +170,7 @@ enum FIPS_CRYPTO_MB_FUNC { nistp256_ecdsa_verify_ssl_mb8, nistp384_ecdsa_verify_ssl_mb8, nistp521_ecdsa_verify_ssl_mb8, - + rsa_public_ssl_mb8, rsa_private_ssl_mb8, rsa_private_crt_ssl_mb8, @@ -184,7 +184,7 @@ enum FIPS_CRYPTO_MB_FUNC { x25519_public_key_mb8, x25519_mb8, - + sm2_ecpublic_key_mb8, sm2_ecdh_mb8, sm2_ecdsa_sign_mb8, @@ -233,20 +233,20 @@ enum FIPS_CRYPTO_MB_FUNC { /** * \brief * - * An indicator if a function is FIPS-approved or not - * - * \param[in] function member of FIPS_CRYPTO_MB_FUNC enumerator + * An indicator if a function is FIPS-approved or not + * + * \param[in] function member of FIPS_CRYPTO_MB_FUNC enumerator * that corresponds to API being checked. * \return func_fips_approved equal to 1 if FIPS-approved algorithm is used * - * Example: - * Library API FIPS_CRYPTO_MB_FUNC + * Example: + * Library API FIPS_CRYPTO_MB_FUNC * mbx_rsa_public_mb8 -> rsa_public_mb8 * mbx_nistp256_ecdh_mb8 -> nistp256_ecdh_mb8 * mbx_ -> - * + * */ -__INLINE func_fips_approved mbx_is_fips_approved_func(enum FIPS_CRYPTO_MB_FUNC function) +__MBX_INLINE func_fips_approved mbx_is_fips_approved_func(enum FIPS_CRYPTO_MB_FUNC function) { return ((int)function > 0); } diff --git a/sources/ippcp/crypto_mb/include/crypto_mb/status.h b/sources/ippcp/crypto_mb/include/crypto_mb/status.h index 16b395cd..7d5f3722 100644 --- a/sources/ippcp/crypto_mb/include/crypto_mb/status.h +++ b/sources/ippcp/crypto_mb/include/crypto_mb/status.h @@ -29,23 +29,23 @@ typedef int64u mbx_status16; #define MBX_STATUS_LOW_ORDER_ERR (4) #define MBX_STATUS_SIGNATURE_ERR (8) -__INLINE mbx_status MBX_SET_STS(mbx_status status, int numb, mbx_status sttVal) +__MBX_INLINE mbx_status MBX_SET_STS(mbx_status status, int numb, mbx_status sttVal) { numb &= 7; /* 0 <= numb < 8 */ status &= (mbx_status)(~(0xF << (numb*4))); return status |= (sttVal & 0xF) << (numb*4); } -__INLINE mbx_status MBX_GET_STS(mbx_status status, int numb) +__MBX_INLINE mbx_status MBX_GET_STS(mbx_status status, int numb) { return (status >>(numb*4)) & 0xF; } -__INLINE mbx_status MBX_SET_STS_ALL(mbx_status stsVal) +__MBX_INLINE mbx_status MBX_SET_STS_ALL(mbx_status stsVal) { return (stsVal<<4*7) | (stsVal<<4*6) | (stsVal<<4*5) | (stsVal<<4*4) | (stsVal<<4*3) | (stsVal<<4*2) | (stsVal<<4*1) | stsVal; } -__INLINE mbx_status MBX_SET_STS_BY_MASK(mbx_status status, int8u mask, mbx_status sttVal) +__MBX_INLINE mbx_status MBX_SET_STS_BY_MASK(mbx_status status, int8u mask, mbx_status sttVal) { int numb; @@ -56,7 +56,7 @@ __INLINE mbx_status MBX_SET_STS_BY_MASK(mbx_status status, int8u mask, mbx_statu return status; } -__INLINE int MBX_IS_ANY_OK_STS(mbx_status status) +__MBX_INLINE int MBX_IS_ANY_OK_STS(mbx_status status) { int ret = MBX_STATUS_OK==MBX_GET_STS(status, 0) || MBX_STATUS_OK==MBX_GET_STS(status, 1) @@ -74,30 +74,30 @@ __INLINE int MBX_IS_ANY_OK_STS(mbx_status status) */ /* Accessors for the low and high part of 64-bit status */ -__INLINE mbx_status MBX_GET_HIGH_PART_STS16(mbx_status16 status16) +__MBX_INLINE mbx_status MBX_GET_HIGH_PART_STS16(mbx_status16 status16) { return ((mbx_status)(((mbx_status16)(status16) >> 32) & 0xFFFFFFFF)); } -__INLINE mbx_status MBX_GET_LOW_PART_STS16(mbx_status16 status16) +__MBX_INLINE mbx_status MBX_GET_LOW_PART_STS16(mbx_status16 status16) { return ((mbx_status)(status16)); } -__INLINE mbx_status16 MBX_SET_STS16_ALL(mbx_status16 stsVal) +__MBX_INLINE mbx_status16 MBX_SET_STS16_ALL(mbx_status16 stsVal) { return (stsVal<<4*15) | (stsVal<<4*14) | (stsVal<<4*13) | (stsVal<<4*12) | (stsVal<<4*11) | (stsVal<<4*10) | (stsVal<<4*9) | (stsVal<<4*8) | \ (stsVal<<4*7) | (stsVal<<4*6) | (stsVal<<4*5) | (stsVal<<4*4) | (stsVal<<4*3) | (stsVal<<4*2) | (stsVal<<4*1) | stsVal; } - -__INLINE mbx_status16 MBX_SET_STS16(mbx_status16 status, int numb, mbx_status16 sttVal) + +__MBX_INLINE mbx_status16 MBX_SET_STS16(mbx_status16 status, int numb, mbx_status16 sttVal) { numb &= 15; /* 0 <= numb < 16 */ status &= (mbx_status16)(~((int64u)0xF << (numb*4))); return status |= (sttVal & 0xF) << (numb*4); } -__INLINE mbx_status16 MBX_SET_STS16_BY_MASK(mbx_status16 status, int16u mask, mbx_status16 sttVal) +__MBX_INLINE mbx_status16 MBX_SET_STS16_BY_MASK(mbx_status16 status, int16u mask, mbx_status16 sttVal) { int numb; for (numb = 0; numb < 16; numb++) { @@ -107,7 +107,7 @@ __INLINE mbx_status16 MBX_SET_STS16_BY_MASK(mbx_status16 status, int16u mask, mb return status; } -__INLINE int MBX_IS_ANY_OK_STS16(mbx_status16 status) +__MBX_INLINE int MBX_IS_ANY_OK_STS16(mbx_status16 status) { return MBX_IS_ANY_OK_STS(MBX_GET_HIGH_PART_STS16(status)) || \ MBX_IS_ANY_OK_STS(MBX_GET_LOW_PART_STS16(status)); diff --git a/sources/ippcp/crypto_mb/include/crypto_mb/version.h b/sources/ippcp/crypto_mb/include/crypto_mb/version.h index 646111ed..e9927c28 100644 --- a/sources/ippcp/crypto_mb/include/crypto_mb/version.h +++ b/sources/ippcp/crypto_mb/include/crypto_mb/version.h @@ -21,14 +21,14 @@ /* crypto_mb name & version */ #define MBX_LIB_NAME() "crypto_mb" -#define MBX_VER_MAJOR 1 +#define MBX_VER_MAJOR 2 #define MBX_VER_MINOR 0 -#define MBX_VER_REV 13 +#define MBX_VER_REV 0 /* major interface version */ -#define MBX_INTERFACE_VERSION_MAJOR 11 +#define MBX_INTERFACE_VERSION_MAJOR 12 /* minor interface version */ -#define MBX_INTERFACE_VERSION_MINOR 14 +#define MBX_INTERFACE_VERSION_MINOR 0 typedef struct { int major; /* e.g. 1 */ diff --git a/sources/ippcp/crypto_mb/include/internal/common/crypto_mb_ver.h b/sources/ippcp/crypto_mb/include/internal/common/crypto_mb_ver.h index d154d244..7f0d254b 100644 --- a/sources/ippcp/crypto_mb/include/internal/common/crypto_mb_ver.h +++ b/sources/ippcp/crypto_mb/include/internal/common/crypto_mb_ver.h @@ -29,6 +29,12 @@ #define MBX_BUILD() 1043 #define MBX_VERSION() MBX_BASE_VERSION(),MBX_BUILD() +#ifndef STR_MBX_BASE_VERSION +#define STR_MBX_BASE_VERSION() STR(MBX_VER_MAJOR) "," STR(MBX_VER_MINOR) "," STR(MBX_VER_REV) +#endif + +#define STR_FILE_MBX_VERSION() STR_MBX_BASE_VERSION()"," STR(MBX_BUILD()) + #ifndef MBX_STR_VERSION #define MBX_STR_VERSION() STR(MBX_VER_MAJOR) "." STR(MBX_VER_MINOR) "." STR(MBX_VER_REV) " (" STR(MBX_INTERFACE_VERSION_MAJOR) "." STR(MBX_INTERFACE_VERSION_MINOR) ")" #endif diff --git a/sources/ippcp/crypto_mb/include/internal/common/ifma_math.h b/sources/ippcp/crypto_mb/include/internal/common/ifma_math.h index 19b966b2..3614e017 100644 --- a/sources/ippcp/crypto_mb/include/internal/common/ifma_math.h +++ b/sources/ippcp/crypto_mb/include/internal/common/ifma_math.h @@ -34,30 +34,30 @@ #define SIMD_BYTES (SIMD_LEN/8) #define MB_WIDTH (SIMD_LEN/64) - __INLINE U64 loadu64(const void *p) { + __MBX_INLINE U64 loadu64(const void *p) { return _mm512_loadu_si512((U64*)p); } - __INLINE U64 loadstream64(const void *p) { + __MBX_INLINE U64 loadstream64(const void *p) { return _mm512_stream_load_si512 ((U64*)p); } - __INLINE void storeu64(const void *p, U64 v) { + __MBX_INLINE void storeu64(const void *p, U64 v) { _mm512_storeu_si512((U64*)p, v); } #define mask_mov64 _mm512_mask_mov_epi64 #define set64 _mm512_set1_epi64 - __INLINE U64 fma52lo(U64 a, U64 b, U64 c) { + __MBX_INLINE U64 fma52lo(U64 a, U64 b, U64 c) { return _mm512_madd52lo_epu64(a, b, c); } - __INLINE U64 fma52hi(U64 a, U64 b, U64 c) { + __MBX_INLINE U64 fma52hi(U64 a, U64 b, U64 c) { return _mm512_madd52hi_epu64(a, b, c); } - __INLINE U64 mul52lo(U64 b, U64 c) { + __MBX_INLINE U64 mul52lo(U64 b, U64 c) { return _mm512_madd52lo_epu64(_mm512_setzero_si512(), b, c); } @@ -73,7 +73,7 @@ __asm__ ( "vpmadd52huq " #o "(%2), %1, %0" : "+x" (r): "x" (b), "r" (c) ); \ } - __INLINE U64 select64(__mb_mask k, U64 v, U64 *d) { + __MBX_INLINE U64 select64(__mb_mask k, U64 v, U64 *d) { __asm__("vmovdqu64 %2, %%zmm0 \n" "vpblendmq %%zmm0, %0, %0 %{%1%} \n" : "+v"(v) @@ -81,9 +81,9 @@ : "zmm0"); return v; } - + #else - // Use IFMA instrinsics for all other compilers + // Use IFMA intrinsics for all other compilers #define _mm512_madd52lo_epu64_(r, a, b, c, o) {\ r=fma52lo(a, b, _mm512_loadu_si512((U64*)(((char*)c)+o))); \ } @@ -93,48 +93,48 @@ } #pragma optimize("", off) - __INLINE U64 select64(__mb_mask k, U64 v, U64 *d) { + __MBX_INLINE U64 select64(__mb_mask k, U64 v, U64 *d) { return _mm512_mask_blend_epi64(k, v, _mm512_load_si512(d)); } - + #pragma optimize("", on) #endif #define fma52lo_mem(r, a, b, c, o) _mm512_madd52lo_epu64_(r, a, b, c, o) // gres #define fma52hi_mem(r, a, b, c, o) _mm512_madd52hi_epu64_(r, a, b, c, o) // gres - __INLINE U64 add64(U64 a, U64 b) { + __MBX_INLINE U64 add64(U64 a, U64 b) { return _mm512_add_epi64(a, b); } - __INLINE U64 sub64(U64 a, U64 b) { + __MBX_INLINE U64 sub64(U64 a, U64 b) { return _mm512_sub_epi64(a, b); } - __INLINE U64 get_zero64() { + __MBX_INLINE U64 get_zero64() { return _mm512_setzero_si512(); } - __INLINE void set_zero64(U64 *a) { + __MBX_INLINE void set_zero64(U64 *a) { *a = _mm512_xor_si512(*a, *a); } - __INLINE U64 set1(unsigned long long a) { + __MBX_INLINE U64 set1(unsigned long long a) { return _mm512_set1_epi64((long long)a); } - __INLINE U64 srli64(U64 a, int s) { + __MBX_INLINE U64 srli64(U64 a, int s) { return _mm512_srli_epi64(a, s); } #define srai64 _mm512_srai_epi64 #define slli64 _mm512_slli_epi64 - __INLINE U64 and64_const(U64 a, unsigned long long mask) { + __MBX_INLINE U64 and64_const(U64 a, unsigned long long mask) { return _mm512_and_epi64(a, _mm512_set1_epi64((long long)mask)); } - __INLINE U64 and64(U64 a, U64 mask) { + __MBX_INLINE U64 and64(U64 a, U64 mask) { return _mm512_and_epi64(a, mask); } @@ -150,7 +150,7 @@ #define mask_sub64 _mm512_mask_sub_epi64 #define maskz_sub64 _mm512_maskz_sub_epi64 - __INLINE __mb_mask is_zero(U64* p, int len) { + __MBX_INLINE __mb_mask is_zero(U64* p, int len) { U64 Z = p[0]; for(int i = 1; i < len; i++) { Z = or64(Z, p[i]); @@ -164,7 +164,7 @@ #else #define mask_xor _kxor_mask8 #endif - + #define get_mask(a) (a) #define get_mask_value(a) (a) @@ -196,7 +196,7 @@ X5_ = _mm512_mask_shuffle_i64x2(X45H, 0b11001111, X0123H, X67H, 0b10001000 ); \ X7_ = _mm512_mask_shuffle_i64x2(X67H, 0b00111111, X0123H, X45H, 0b10111101 ); \ } - + #else #error "Incorrect SIMD length" #endif // SIMD_LEN diff --git a/sources/ippcp/crypto_mb/include/internal/common/mem_fns.h b/sources/ippcp/crypto_mb/include/internal/common/mem_fns.h index 8695e0f4..6dfa9db5 100644 --- a/sources/ippcp/crypto_mb/include/internal/common/mem_fns.h +++ b/sources/ippcp/crypto_mb/include/internal/common/mem_fns.h @@ -17,7 +17,7 @@ /* * Auxiliary functions to set and copy memory */ -__INLINE void CopyBlock(const void* pSrc, void* pDst, int numBytes) +__MBX_INLINE void CopyBlock(const void* pSrc, void* pDst, int numBytes) { const int8u* s = (int8u*)pSrc; int8u* d = (int8u*)pDst; @@ -26,7 +26,7 @@ __INLINE void CopyBlock(const void* pSrc, void* pDst, int numBytes) d[k] = s[k]; } -__INLINE void PadBlock(int8u paddingByte, void* pDst, int numBytes) +__MBX_INLINE void PadBlock(int8u paddingByte, void* pDst, int numBytes) { int8u* d = (int8u*)pDst; int k; diff --git a/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_arith_p256.h b/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_arith_p256.h index 028ce94a..ca3d26ba 100644 --- a/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_arith_p256.h +++ b/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_arith_p256.h @@ -49,23 +49,23 @@ static const int64u VMASK52[sizeof(U64)/sizeof(int64u)] = { /* set FE to zero */ -__INLINE void MB_FUNC_NAME(zero_FE256_)(U64 T[]) +__MBX_INLINE void MB_FUNC_NAME(zero_FE256_)(U64 T[]) { T[0] = T[1] = T[2] = T[3] = T[4] = get_zero64(); } /* check if FE is zero */ -__INLINE __mb_mask MB_FUNC_NAME(is_zero_FE256_)(const U64 T[]) +__MBX_INLINE __mb_mask MB_FUNC_NAME(is_zero_FE256_)(const U64 T[]) { U64 Z = or64(or64(T[0], T[1]), or64(or64(T[2], T[3]), T[4])); return cmpeq64_mask(Z, get_zero64()); } -__INLINE U64 cmov_U64(U64 a, U64 b, __mb_mask kmask) +__MBX_INLINE U64 cmov_U64(U64 a, U64 b, __mb_mask kmask) { return mask_mov64 (a, kmask, b); } /* move field element */ -__INLINE void MB_FUNC_NAME(mov_FE256_)(U64 r[], const U64 a[]) +__MBX_INLINE void MB_FUNC_NAME(mov_FE256_)(U64 r[], const U64 a[]) { r[0] = a[0]; r[1] = a[1]; @@ -75,7 +75,8 @@ __INLINE void MB_FUNC_NAME(mov_FE256_)(U64 r[], const U64 a[]) } /* move coordinate using mask: R = k? A : B */ -__INLINE void MB_FUNC_NAME(mask_mov_FE256_)(U64 R[], const U64 B[], __mb_mask k, const U64 A[]) +OPTIMIZE_OFF_VS19 +__MBX_INLINE void MB_FUNC_NAME(mask_mov_FE256_)(U64 R[], const U64 B[], __mb_mask k, const U64 A[]) { R[0] = mask_mov64(B[0], k, A[0]); R[1] = mask_mov64(B[1], k, A[1]); @@ -84,7 +85,7 @@ __INLINE void MB_FUNC_NAME(mask_mov_FE256_)(U64 R[], const U64 B[], __mb_mask k, R[4] = mask_mov64(B[4], k, A[4]); } -__INLINE void MB_FUNC_NAME(secure_mask_mov_FE256_)(U64 R[], U64 B[], __mb_mask k, const U64 A[]) +__MBX_INLINE void MB_FUNC_NAME(secure_mask_mov_FE256_)(U64 R[], U64 B[], __mb_mask k, const U64 A[]) { R[0] = select64(k, B[0], (U64*)(&A[0])); R[1] = select64(k, B[1], (U64*)(&A[1])); @@ -94,7 +95,7 @@ __INLINE void MB_FUNC_NAME(secure_mask_mov_FE256_)(U64 R[], U64 B[], __mb_mask k } /* compare two FE */ -__INLINE __mb_mask MB_FUNC_NAME(cmp_lt_FE256_)(const U64 A[], const U64 B[]) +__MBX_INLINE __mb_mask MB_FUNC_NAME(cmp_lt_FE256_)(const U64 A[], const U64 B[]) { /* r = a - b */ U64 r0 = sub64(A[0], B[0]); @@ -113,7 +114,7 @@ __INLINE __mb_mask MB_FUNC_NAME(cmp_lt_FE256_)(const U64 A[], const U64 B[]) return cmp64_mask(r4, get_zero64(), _MM_CMPINT_LT); } -__INLINE __mb_mask MB_FUNC_NAME(cmp_eq_FE256_)(const U64 A[], const U64 B[]) +__MBX_INLINE __mb_mask MB_FUNC_NAME(cmp_eq_FE256_)(const U64 A[], const U64 B[]) { __ALIGN64 U64 msg[P256_LEN52]; @@ -122,7 +123,7 @@ __INLINE __mb_mask MB_FUNC_NAME(cmp_eq_FE256_)(const U64 A[], const U64 B[]) msg[2] = xor64(A[2], B[2]); msg[3] = xor64(A[3], B[3]); msg[4] = xor64(A[4], B[4]); - + return MB_FUNC_NAME(is_zero_FE256_)(msg); } diff --git a/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_arith_p384.h b/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_arith_p384.h index 7414ccc8..2f043a09 100644 --- a/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_arith_p384.h +++ b/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_arith_p384.h @@ -53,24 +53,24 @@ static const int64u VMASK52[sizeof(U64)/sizeof(int64u)] = { /* set FE to zero */ -__INLINE void MB_FUNC_NAME(zero_FE384_)(U64 T[]) +__MBX_INLINE void MB_FUNC_NAME(zero_FE384_)(U64 T[]) { T[0] = T[1] = T[2] = T[3] = T[4] = T[5] = T[6] = T[7] = get_zero64(); } /* check if FE is zero */ -__INLINE __mb_mask MB_FUNC_NAME(is_zero_FE384_)(const U64 T[]) +__MBX_INLINE __mb_mask MB_FUNC_NAME(is_zero_FE384_)(const U64 T[]) { //U64 Z = or64(or64(or64(or64(or64(or64(or64(T[0], T[1]), T[2]), T[3]), T[4]), T[5]), T[6]), T[7]); U64 Z = or64(or64(or64(T[0], T[1]), or64(T[2], T[3])), or64(or64(T[4], T[5]), or64(T[6], T[7]))); return cmpeq64_mask(Z, get_zero64()); } -__INLINE U64 cmov_U64(U64 a, U64 b, __mb_mask kmask) +__MBX_INLINE U64 cmov_U64(U64 a, U64 b, __mb_mask kmask) { return mask_mov64 (a, kmask, b); } /* move field element */ -__INLINE void MB_FUNC_NAME(mov_FE384_)(U64 r[], const U64 a[]) +__MBX_INLINE void MB_FUNC_NAME(mov_FE384_)(U64 r[], const U64 a[]) { r[0] = a[0]; r[1] = a[1]; @@ -83,7 +83,8 @@ __INLINE void MB_FUNC_NAME(mov_FE384_)(U64 r[], const U64 a[]) } /* move coordinate using mask: R = k? A : B */ -__INLINE void MB_FUNC_NAME(mask_mov_FE384_)(U64 R[], const U64 B[], __mb_mask k, const U64 A[]) +OPTIMIZE_OFF_VS19 +__MBX_INLINE void MB_FUNC_NAME(mask_mov_FE384_)(U64 R[], const U64 B[], __mb_mask k, const U64 A[]) { R[0] = mask_mov64(B[0], k, A[0]); R[1] = mask_mov64(B[1], k, A[1]); @@ -95,7 +96,7 @@ __INLINE void MB_FUNC_NAME(mask_mov_FE384_)(U64 R[], const U64 B[], __mb_mask k, R[7] = mask_mov64(B[7], k, A[7]); } -__INLINE void MB_FUNC_NAME(secure_mask_mov_FE384_)(U64 R[], U64 B[], __mb_mask k, const U64 A[]) +__MBX_INLINE void MB_FUNC_NAME(secure_mask_mov_FE384_)(U64 R[], U64 B[], __mb_mask k, const U64 A[]) { R[0] = select64(k, B[0], (U64*)(&A[0])); R[1] = select64(k, B[1], (U64*)(&A[1])); @@ -107,7 +108,7 @@ __INLINE void MB_FUNC_NAME(secure_mask_mov_FE384_)(U64 R[], U64 B[], __mb_mask k R[7] = select64(k, B[7], (U64*)(&A[7])); } -__INLINE __mb_mask MB_FUNC_NAME(cmp_lt_FE384_)(const U64 A[], const U64 B[]) +__MBX_INLINE __mb_mask MB_FUNC_NAME(cmp_lt_FE384_)(const U64 A[], const U64 B[]) { /* r = a - b */ U64 r0 = sub64(A[0], B[0]); @@ -134,7 +135,7 @@ __INLINE __mb_mask MB_FUNC_NAME(cmp_lt_FE384_)(const U64 A[], const U64 B[]) } /* compare two FE */ -__INLINE __mb_mask MB_FUNC_NAME(cmp_eq_FE384_)(const U64 A[], const U64 B[]) +__MBX_INLINE __mb_mask MB_FUNC_NAME(cmp_eq_FE384_)(const U64 A[], const U64 B[]) { U64 T[P384_LEN52]; T[0] = xor64(A[0], B[0]); diff --git a/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_arith_p521.h b/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_arith_p521.h index 3caee065..1426a982 100644 --- a/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_arith_p521.h +++ b/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_arith_p521.h @@ -56,23 +56,23 @@ static const int64u VMASK52[sizeof(U64)/sizeof(int64u)] = { /* set FE to zero */ -__INLINE void MB_FUNC_NAME(zero_FE521_)(U64 T[]) +__MBX_INLINE void MB_FUNC_NAME(zero_FE521_)(U64 T[]) { T[0] = T[1] = T[2] = T[3] = T[4] = T[5] = T[6] = T[7] = T[8] = T[9] = T[10] = get_zero64(); } /* check if FE is zero */ -__INLINE __mb_mask MB_FUNC_NAME(is_zero_FE521_)(const U64 T[]) +__MBX_INLINE __mb_mask MB_FUNC_NAME(is_zero_FE521_)(const U64 T[]) { U64 Z = or64(or64(or64(or64(T[0], T[1]), or64(T[2], T[3])), or64(or64(T[4], T[5]), or64(T[6], T[7]))), or64(or64(T[8], T[9]), T[10])); return cmpeq64_mask(Z, get_zero64()); } -__INLINE U64 cmov_U64(U64 a, U64 b, __mb_mask kmask) +__MBX_INLINE U64 cmov_U64(U64 a, U64 b, __mb_mask kmask) { return mask_mov64 (a, kmask, b); } /* move field element */ -__INLINE void MB_FUNC_NAME(mov_FE521_)(U64 r[], const U64 a[]) +__MBX_INLINE void MB_FUNC_NAME(mov_FE521_)(U64 r[], const U64 a[]) { r[0] = a[0]; r[1] = a[1]; @@ -88,7 +88,8 @@ __INLINE void MB_FUNC_NAME(mov_FE521_)(U64 r[], const U64 a[]) } /* move coordinate using mask: R = k? A : B */ -__INLINE void MB_FUNC_NAME(mask_mov_FE521_)(U64 R[], const U64 B[], __mb_mask k, const U64 A[]) +OPTIMIZE_OFF_VS19 +__MBX_INLINE void MB_FUNC_NAME(mask_mov_FE521_)(U64 R[], const U64 B[], __mb_mask k, const U64 A[]) { R[0] = mask_mov64(B[0], k, A[0]); R[1] = mask_mov64(B[1], k, A[1]); @@ -103,7 +104,7 @@ __INLINE void MB_FUNC_NAME(mask_mov_FE521_)(U64 R[], const U64 B[], __mb_mask k, R[10]= mask_mov64(B[10],k, A[10]); } -__INLINE void MB_FUNC_NAME(secure_mask_mov_FE521_)(U64 R[], U64 B[], __mb_mask k, const U64 A[]) +__MBX_INLINE void MB_FUNC_NAME(secure_mask_mov_FE521_)(U64 R[], U64 B[], __mb_mask k, const U64 A[]) { R[0] = select64(k, B[0], (U64*)(&A[0])); R[1] = select64(k, B[1], (U64*)(&A[1])); @@ -118,7 +119,7 @@ __INLINE void MB_FUNC_NAME(secure_mask_mov_FE521_)(U64 R[], U64 B[], __mb_mask k R[10]= select64(k,B[10], (U64*)(&A[10])); } -__INLINE __mb_mask MB_FUNC_NAME(cmp_lt_FE521_)(const U64 A[], const U64 B[]) +__MBX_INLINE __mb_mask MB_FUNC_NAME(cmp_lt_FE521_)(const U64 A[], const U64 B[]) { /* r = a - b */ U64 r0 = sub64(A[0], B[0]); @@ -149,7 +150,7 @@ __INLINE __mb_mask MB_FUNC_NAME(cmp_lt_FE521_)(const U64 A[], const U64 B[]) return cmp64_mask(r10, get_zero64(), _MM_CMPINT_LT); } -__INLINE __mb_mask MB_FUNC_NAME(cmp_eq_FE521_)(const U64 A[], const U64 B[]) +__MBX_INLINE __mb_mask MB_FUNC_NAME(cmp_eq_FE521_)(const U64 A[], const U64 B[]) { U64 T[P521_LEN52]; diff --git a/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_ecpoint_p256.h b/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_ecpoint_p256.h index 4b5f37e7..99d8ff3e 100644 --- a/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_ecpoint_p256.h +++ b/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_ecpoint_p256.h @@ -38,13 +38,13 @@ typedef struct { /* check if coordinate is zero */ -__INLINE __mb_mask MB_FUNC_NAME(is_zero_point_cordinate_)(const U64 T[]) +__MBX_INLINE __mb_mask MB_FUNC_NAME(is_zero_point_cordinate_)(const U64 T[]) { return MB_FUNC_NAME(is_zero_FE256_)(T); } /* set point to infinity */ -__INLINE void MB_FUNC_NAME(set_point_to_infinity_)(P256_POINT* r) +__MBX_INLINE void MB_FUNC_NAME(set_point_to_infinity_)(P256_POINT* r) { r->X[0] = r->X[1] = r->X[2] = r->X[3] = r->X[4] = get_zero64(); r->Y[0] = r->Y[1] = r->Y[2] = r->Y[3] = r->Y[4] = get_zero64(); @@ -52,7 +52,7 @@ __INLINE void MB_FUNC_NAME(set_point_to_infinity_)(P256_POINT* r) } /* set point to infinity by mask */ -__INLINE void MB_FUNC_NAME(mask_set_point_to_infinity_)(P256_POINT* r, __mb_mask mask) +__MBX_INLINE void MB_FUNC_NAME(mask_set_point_to_infinity_)(P256_POINT* r, __mb_mask mask) { U64 zeros = get_zero64(); @@ -76,7 +76,7 @@ __INLINE void MB_FUNC_NAME(mask_set_point_to_infinity_)(P256_POINT* r, __mb_mask } /* set affine point to infinity */ -__INLINE void MB_FUNC_NAME(set_point_affine_to_infinity_)(P256_POINT_AFFINE* r) +__MBX_INLINE void MB_FUNC_NAME(set_point_affine_to_infinity_)(P256_POINT_AFFINE* r) { r->x[0] = r->x[1] = r->x[2] = r->x[3] = r->x[4] = get_zero64(); r->y[0] = r->y[1] = r->y[2] = r->y[3] = r->y[4] = get_zero64(); diff --git a/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_ecpoint_p384.h b/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_ecpoint_p384.h index 9d224750..180e497d 100644 --- a/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_ecpoint_p384.h +++ b/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_ecpoint_p384.h @@ -38,13 +38,13 @@ typedef struct { /* check if coordinate is zero */ -__INLINE __mb_mask MB_FUNC_NAME(is_zero_point_cordinate_)(const U64 T[]) +__MBX_INLINE __mb_mask MB_FUNC_NAME(is_zero_point_cordinate_)(const U64 T[]) { return MB_FUNC_NAME(is_zero_FE384_)(T); } /* set point to infinity */ -__INLINE void MB_FUNC_NAME(set_point_to_infinity_)(P384_POINT* r) +__MBX_INLINE void MB_FUNC_NAME(set_point_to_infinity_)(P384_POINT* r) { r->X[0] = r->X[1] = r->X[2] = r->X[3] = r->X[4] = r->X[5] = r->X[6] = r->X[7] = get_zero64(); r->Y[0] = r->Y[1] = r->Y[2] = r->Y[3] = r->Y[4] = r->Y[5] = r->Y[6] = r->Y[7] = get_zero64(); @@ -52,7 +52,7 @@ __INLINE void MB_FUNC_NAME(set_point_to_infinity_)(P384_POINT* r) } /* set point to infinity by mask */ -__INLINE void MB_FUNC_NAME(mask_set_point_to_infinity_)(P384_POINT* r, __mb_mask mask) +__MBX_INLINE void MB_FUNC_NAME(mask_set_point_to_infinity_)(P384_POINT* r, __mb_mask mask) { U64 zeros = get_zero64(); @@ -85,7 +85,7 @@ __INLINE void MB_FUNC_NAME(mask_set_point_to_infinity_)(P384_POINT* r, __mb_mask } /* set affine point to infinity */ -__INLINE void MB_FUNC_NAME(set_point_affine_to_infinity_)(P384_POINT_AFFINE* r) +__MBX_INLINE void MB_FUNC_NAME(set_point_affine_to_infinity_)(P384_POINT_AFFINE* r) { r->x[0] = r->x[1] = r->x[2] = r->x[3] = r->x[4] = r->x[5] = r->x[6] = r->x[7] = get_zero64(); r->y[0] = r->y[1] = r->y[2] = r->y[3] = r->y[4] = r->y[5] = r->y[6] = r->y[7] = get_zero64(); diff --git a/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_ecpoint_p521.h b/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_ecpoint_p521.h index c6a3d71e..66b5eb0a 100644 --- a/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_ecpoint_p521.h +++ b/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_ecpoint_p521.h @@ -38,13 +38,13 @@ typedef struct { /* check if coordinate is zero */ -__INLINE __mb_mask MB_FUNC_NAME(is_zero_point_cordinate_)(const U64 T[]) +__MBX_INLINE __mb_mask MB_FUNC_NAME(is_zero_point_cordinate_)(const U64 T[]) { return MB_FUNC_NAME(is_zero_FE521_)(T); } /* set point to infinity */ -__INLINE void MB_FUNC_NAME(set_point_to_infinity_)(P521_POINT* r) +__MBX_INLINE void MB_FUNC_NAME(set_point_to_infinity_)(P521_POINT* r) { r->X[0] = r->X[1] = r->X[2] = r->X[3] = r->X[4] = r->X[5] = r->X[6] = r->X[7] = r->X[8] = r->X[9] = r->X[10] = get_zero64(); r->Y[0] = r->Y[1] = r->Y[2] = r->Y[3] = r->Y[4] = r->Y[5] = r->Y[6] = r->Y[7] = r->Y[8] = r->Y[9] = r->Y[10] = get_zero64(); @@ -52,7 +52,7 @@ __INLINE void MB_FUNC_NAME(set_point_to_infinity_)(P521_POINT* r) } /* set point to infinity by mask */ -__INLINE void MB_FUNC_NAME(mask_set_point_to_infinity_)(P521_POINT* r, __mb_mask mask) +__MBX_INLINE void MB_FUNC_NAME(mask_set_point_to_infinity_)(P521_POINT* r, __mb_mask mask) { U64 zeros = get_zero64(); @@ -94,7 +94,7 @@ __INLINE void MB_FUNC_NAME(mask_set_point_to_infinity_)(P521_POINT* r, __mb_mask } /* set affine point to infinity */ -__INLINE void MB_FUNC_NAME(set_point_affine_to_infinity_)(P521_POINT_AFFINE* r) +__MBX_INLINE void MB_FUNC_NAME(set_point_affine_to_infinity_)(P521_POINT_AFFINE* r) { r->x[0] = r->x[1] = r->x[2] = r->x[3] = r->x[4] = r->x[5] = r->x[6] = r->x[7] = r->x[8] = r->x[9] = r->x[10] = get_zero64(); r->y[0] = r->y[1] = r->y[2] = r->y[3] = r->y[4] = r->y[5] = r->y[6] = r->y[7] = r->y[8] = r->y[9] = r->y[10] = get_zero64(); diff --git a/sources/ippcp/crypto_mb/include/internal/ed25519/ifma_arith_ed25519.h b/sources/ippcp/crypto_mb/include/internal/ed25519/ifma_arith_ed25519.h index a56ad98d..f28aa980 100644 --- a/sources/ippcp/crypto_mb/include/internal/ed25519/ifma_arith_ed25519.h +++ b/sources/ippcp/crypto_mb/include/internal/ed25519/ifma_arith_ed25519.h @@ -72,7 +72,7 @@ typedef struct ge52_cached_mb_t { */ /* ext => homo */ -__INLINE void ge52_ext_to_homo_mb(ge52_homo_mb*r, const ge52_ext_mb* p) +__MBX_INLINE void ge52_ext_to_homo_mb(ge52_homo_mb*r, const ge52_ext_mb* p) { fe52_copy_mb(r->X, p->X); fe52_copy_mb(r->Y, p->Y); @@ -80,7 +80,7 @@ __INLINE void ge52_ext_to_homo_mb(ge52_homo_mb*r, const ge52_ext_mb* p) } /* p1p1 => homo */ -__INLINE void ge52_p1p1_to_homo_mb(ge52_homo_mb *r, const ge52_p1p1_mb *p) +__MBX_INLINE void ge52_p1p1_to_homo_mb(ge52_homo_mb *r, const ge52_p1p1_mb *p) { fe52_mul(r->X, p->X, p->T); fe52_mul(r->Y, p->Y, p->Z); @@ -88,7 +88,7 @@ __INLINE void ge52_p1p1_to_homo_mb(ge52_homo_mb *r, const ge52_p1p1_mb *p) } /* p1p1 => ext */ -__INLINE void ge52_p1p1_to_ext_mb(ge52_ext_mb *r, const ge52_p1p1_mb *p) +__MBX_INLINE void ge52_p1p1_to_ext_mb(ge52_ext_mb *r, const ge52_p1p1_mb *p) { fe52_mul(r->X, p->X, p->T); fe52_mul(r->Y, p->Y, p->Z); @@ -98,26 +98,26 @@ __INLINE void ge52_p1p1_to_ext_mb(ge52_ext_mb *r, const ge52_p1p1_mb *p) /* set GE to neutral */ -__INLINE void neutral_ge52_homo_mb(ge52_homo_mb* ge) +__MBX_INLINE void neutral_ge52_homo_mb(ge52_homo_mb* ge) { fe52_0_mb(ge->X); fe52_1_mb(ge->Y); fe52_1_mb(ge->Z); } -__INLINE void neutral_ge52_ext_mb(ge52_ext_mb* ge) +__MBX_INLINE void neutral_ge52_ext_mb(ge52_ext_mb* ge) { fe52_0_mb(ge->X); fe52_1_mb(ge->Y); fe52_0_mb(ge->T); fe52_1_mb(ge->Z); } -__INLINE void neutral_ge52_precomp_mb(ge52_precomp_mb *ge) +__MBX_INLINE void neutral_ge52_precomp_mb(ge52_precomp_mb *ge) { fe52_1_mb(ge->ysubx); fe52_1_mb(ge->yaddx); fe52_0_mb(ge->t2d); } -__INLINE void neutral_ge52_cached_mb(ge52_cached_mb* ge) +__MBX_INLINE void neutral_ge52_cached_mb(ge52_cached_mb* ge) { fe52_1_mb(ge->YsubX); fe52_1_mb(ge->YaddX); @@ -126,19 +126,19 @@ __INLINE void neutral_ge52_cached_mb(ge52_cached_mb* ge) } /* move GE under mask (conditionally): r = k? a : b */ -__INLINE void ge52_cmov1_precomp_mb(ge52_precomp_mb* r, const ge52_precomp_mb* b, __mb_mask k, const ge52_precomp* a) +__MBX_INLINE void ge52_cmov1_precomp_mb(ge52_precomp_mb* r, const ge52_precomp_mb* b, __mb_mask k, const ge52_precomp* a) { fe52_cmov1_mb(r->ysubx, b->ysubx, k, a->ysubx); fe52_cmov1_mb(r->yaddx, b->yaddx, k, a->yaddx); fe52_cmov1_mb(r->t2d, b->t2d, k, a->t2d); } -__INLINE void cmov_ge52_precomp_mb(ge52_precomp_mb* r, const ge52_precomp_mb* b, __mb_mask k, const ge52_precomp_mb* a) +__MBX_INLINE void cmov_ge52_precomp_mb(ge52_precomp_mb* r, const ge52_precomp_mb* b, __mb_mask k, const ge52_precomp_mb* a) { fe52_cmov_mb(r->ysubx, b->ysubx, k, a->ysubx); fe52_cmov_mb(r->yaddx, b->yaddx, k, a->yaddx); fe52_cmov_mb(r->t2d, b->t2d, k, a->t2d); } -__INLINE void cmov_ge52_cached_mb(ge52_cached_mb* r, const ge52_cached_mb* b, __mb_mask k, const ge52_cached_mb* a) +__MBX_INLINE void cmov_ge52_cached_mb(ge52_cached_mb* r, const ge52_cached_mb* b, __mb_mask k, const ge52_cached_mb* a) { fe52_cmov_mb(r->YsubX, b->YsubX, k, a->YsubX); fe52_cmov_mb(r->YaddX, b->YaddX, k, a->YaddX); diff --git a/sources/ippcp/crypto_mb/include/internal/ed25519/ifma_arith_p25519.h b/sources/ippcp/crypto_mb/include/internal/ed25519/ifma_arith_p25519.h index e8da4be2..10d7f9a5 100644 --- a/sources/ippcp/crypto_mb/include/internal/ed25519/ifma_arith_p25519.h +++ b/sources/ippcp/crypto_mb/include/internal/ed25519/ifma_arith_p25519.h @@ -47,19 +47,19 @@ typedef U64 fe52_mb[FE_LEN52]; /* set FE to zero */ -__INLINE void fe52_0_mb(fe52_mb fe) +__MBX_INLINE void fe52_0_mb(fe52_mb fe) { fe[0] = fe[1] = fe[2] = fe[3] = fe[4] = get_zero64(); } /* set FE to 1 */ -__INLINE void fe52_1_mb(fe52_mb fe) +__MBX_INLINE void fe52_1_mb(fe52_mb fe) { fe[0] = set1(1LL); fe[1] = fe[2] = fe[3] = fe[4] = get_zero64(); } /* copy FE */ -__INLINE void fe52_copy_mb(fe52_mb r, const fe52_mb a) +__MBX_INLINE void fe52_copy_mb(fe52_mb r, const fe52_mb a) { r[0] = a[0]; r[1] = a[1]; @@ -69,7 +69,7 @@ __INLINE void fe52_copy_mb(fe52_mb r, const fe52_mb a) } /* convert fe52_mb => fe64_mb */ -__INLINE void fe52_to_fe64_mb(fe64_mb r, const fe52_mb a) +__MBX_INLINE void fe52_to_fe64_mb(fe64_mb r, const fe52_mb a) { r[0] = xor64(slli64(a[1],52), a[0]); r[1] = xor64(slli64(a[2],40), srli64(a[1],12)); @@ -78,14 +78,14 @@ __INLINE void fe52_to_fe64_mb(fe64_mb r, const fe52_mb a) } /* check if FE is zero */ -__INLINE __mb_mask fe52_mb_is_zero(const fe52_mb a) +__MBX_INLINE __mb_mask fe52_mb_is_zero(const fe52_mb a) { U64 t = or64(or64(a[0], a[1]), or64(or64(a[2], a[3]), a[4])); return cmpeq64_mask(t, get_zero64()); } /* check if a==b */ -__INLINE __mb_mask fe52_mb_is_equ(const fe52_mb a, const fe52_mb b) +__MBX_INLINE __mb_mask fe52_mb_is_equ(const fe52_mb a, const fe52_mb b) { __ALIGN64 fe52_mb t; t[0] = xor64(a[0], b[0]); @@ -97,7 +97,7 @@ __INLINE __mb_mask fe52_mb_is_equ(const fe52_mb a, const fe52_mb b) } /* move FE under mask (conditionally): r = k? a : b */ -__INLINE void fe52_cmov1_mb(fe52_mb r, const fe52_mb b, __mb_mask k, const fe52 a) +__MBX_INLINE void fe52_cmov1_mb(fe52_mb r, const fe52_mb b, __mb_mask k, const fe52 a) { r[0] = mask_mov64(b[0], k, set1(a[0])); r[1] = mask_mov64(b[1], k, set1(a[1])); @@ -105,7 +105,8 @@ __INLINE void fe52_cmov1_mb(fe52_mb r, const fe52_mb b, __mb_mask k, const fe52 r[3] = mask_mov64(b[3], k, set1(a[3])); r[4] = mask_mov64(b[4], k, set1(a[4])); } -__INLINE void fe52_cmov_mb(fe52_mb r, const fe52_mb b, __mb_mask k, const fe52_mb a) +OPTIMIZE_OFF_VS19 +__MBX_INLINE void fe52_cmov_mb(fe52_mb r, const fe52_mb b, __mb_mask k, const fe52_mb a) { r[0] = mask_mov64(b[0], k, a[0]); r[1] = mask_mov64(b[1], k, a[1]); @@ -115,13 +116,13 @@ __INLINE void fe52_cmov_mb(fe52_mb r, const fe52_mb b, __mb_mask k, const fe52_m } /* swap FE under mask (conditionally): r = k? a : b */ -__INLINE void cswap_U64(U64* x, __mb_mask k, U64* y) +__MBX_INLINE void cswap_U64(U64* x, __mb_mask k, U64* y) { *x = _mm512_mask_xor_epi64(*x, k, *x, *y); *y = _mm512_mask_xor_epi64(*y, k, *y, *x); *x = _mm512_mask_xor_epi64(*x, k, *x, *y); } -__INLINE void fe52_cswap_mb(fe52_mb a, __mb_mask k, fe52_mb b) +__MBX_INLINE void fe52_cswap_mb(fe52_mb a, __mb_mask k, fe52_mb b) { cswap_U64(&a[0], k, &b[0]); cswap_U64(&a[1], k, &b[1]); diff --git a/sources/ippcp/crypto_mb/include/internal/ed25519/sha512.h b/sources/ippcp/crypto_mb/include/internal/ed25519/sha512.h index 8c4bfc4a..795c253d 100644 --- a/sources/ippcp/crypto_mb/include/internal/ed25519/sha512.h +++ b/sources/ippcp/crypto_mb/include/internal/ed25519/sha512.h @@ -67,7 +67,7 @@ static __ALIGN64 const int64u sha512_cnt[] = { #define LSR32(x,nBits) ((x)>>(nBits)) #define LSL32(x,nBits) ((x)<<(nBits)) -/* Rorate (right and left) of WORD */ +/* Rotate (right and left) of WORD */ #if defined(_MSC_VER) && !defined( __ICL ) # include # define ROR32(x, nBits) _lrotr((x),(nBits)) @@ -81,7 +81,7 @@ static __ALIGN64 const int64u sha512_cnt[] = { #define LSR64(x,nBits) ((x)>>(nBits)) #define LSL64(x,nBits) ((x)<<(nBits)) -/* Rorate (right and left) of DWORD */ +/* Rotate (right and left) of DWORD */ #define ROR64(x, nBits) (LSR64((x),(nBits)) | LSL64((x),64-(nBits))) #define ROL64(x, nBits) ROR64((x),(64-(nBits))) diff --git a/sources/ippcp/crypto_mb/include/internal/exp/ifma_exp_method.h b/sources/ippcp/crypto_mb/include/internal/exp/ifma_exp_method.h index d774e317..a4f36e80 100644 --- a/sources/ippcp/crypto_mb/include/internal/exp/ifma_exp_method.h +++ b/sources/ippcp/crypto_mb/include/internal/exp/ifma_exp_method.h @@ -19,7 +19,7 @@ #include -/* exponetiation processing window */ +/* exponentiation processing window */ #define EXP_WIN_SIZE (5) #define EXP_WIN_MASK ((1<X[0] = r->X[1] = r->X[2] = r->X[3] = r->X[4] = get_zero64(); r->Y[0] = r->Y[1] = r->Y[2] = r->Y[3] = r->Y[4] = get_zero64(); @@ -50,7 +50,7 @@ __INLINE void MB_FUNC_NAME(set_point_to_infinity_)(SM2_POINT* r) } /* set point to infinity by mask */ -__INLINE void MB_FUNC_NAME(mask_set_point_to_infinity_)(SM2_POINT* r, __mb_mask mask) +__MBX_INLINE void MB_FUNC_NAME(mask_set_point_to_infinity_)(SM2_POINT* r, __mb_mask mask) { U64 zeros = get_zero64(); @@ -74,7 +74,7 @@ __INLINE void MB_FUNC_NAME(mask_set_point_to_infinity_)(SM2_POINT* r, __mb_mask } /* set affine point to infinity */ -__INLINE void MB_FUNC_NAME(set_point_affine_to_infinity_)(SM2_POINT_AFFINE* r) +__MBX_INLINE void MB_FUNC_NAME(set_point_affine_to_infinity_)(SM2_POINT_AFFINE* r) { r->x[0] = r->x[1] = r->x[2] = r->x[3] = r->x[4] = get_zero64(); r->y[0] = r->y[1] = r->y[2] = r->y[3] = r->y[4] = get_zero64(); diff --git a/sources/ippcp/crypto_mb/include/internal/sm3/sm3_common.h b/sources/ippcp/crypto_mb/include/internal/sm3/sm3_common.h index 1dbe59fc..f0b0f408 100644 --- a/sources/ippcp/crypto_mb/include/internal/sm3/sm3_common.h +++ b/sources/ippcp/crypto_mb/include/internal/sm3/sm3_common.h @@ -46,7 +46,7 @@ #define HASH_BUFF(ctx) ((ctx)->msg_buffer) /* -// constants +// constants */ static const int32u sm3_iv[] = { 0x7380166F, 0x4914B2B9, 0x172442D7, 0xDA8A0600, @@ -65,11 +65,11 @@ __ALIGN64 static const int32u tj_calculated[] = { 0x79CC4519,0xF3988A32,0xE73114 0x879D8A7A,0x0F3B14F5,0x1E7629EA,0x3CEC53D4,0x79D8A7A8,0xF3B14F50,0xE7629EA1,0xCEC53D43 }; /* -// internal functions +// internal functions */ -__INLINE void pad_block(int8u padding_byte, void* dst_p, int num_bytes) +__MBX_INLINE void pad_block(int8u padding_byte, void* dst_p, int num_bytes) { int8u* d = (int8u*)dst_p; int k; @@ -77,7 +77,7 @@ __INLINE void pad_block(int8u padding_byte, void* dst_p, int num_bytes) d[k] = padding_byte; } -__INLINE void TRANSPOSE_8X8_I32(__m256i *v0, __m256i *v1, __m256i *v2, __m256i *v3, +__MBX_INLINE void TRANSPOSE_8X8_I32(__m256i *v0, __m256i *v1, __m256i *v2, __m256i *v3, __m256i *v4, __m256i *v5, __m256i *v6, __m256i *v7) { __m256i w0, w1, w2, w3, w4, w5, w6, w7; @@ -134,7 +134,7 @@ __INLINE void TRANSPOSE_8X8_I32(__m256i *v0, __m256i *v1, __m256i *v2, __m256i * *v7 = _mm256_permute2x128_si256(x3, x7, 0b110001); } -__INLINE void MASK_TRANSPOSE_8X8_I32(int32u* out[8], const int32u* const inp[8], __mmask16 mb_mask) { +__MBX_INLINE void MASK_TRANSPOSE_8X8_I32(int32u* out[8], const int32u* const inp[8], __mmask16 mb_mask) { __m256i v0 = _mm256_loadu_si256((__m256i*)inp[0]); __m256i v1 = _mm256_loadu_si256((__m256i*)inp[1]); __m256i v2 = _mm256_loadu_si256((__m256i*)inp[2]); @@ -158,7 +158,7 @@ __INLINE void MASK_TRANSPOSE_8X8_I32(int32u* out[8], const int32u* const inp[8], } -__INLINE void TRANSPOSE_8X16_I32(int32u* out[16], const int32u* const inp[8], __mmask16 mb_mask) { +__MBX_INLINE void TRANSPOSE_8X16_I32(int32u* out[16], const int32u* const inp[8], __mmask16 mb_mask) { __m256i v0 = _mm256_loadu_si256((__m256i*)inp[0]); __m256i v1 = _mm256_loadu_si256((__m256i*)inp[1]); __m256i v2 = _mm256_loadu_si256((__m256i*)inp[2]); diff --git a/sources/ippcp/crypto_mb/include/internal/sm4/sm4_gcm_mb.h b/sources/ippcp/crypto_mb/include/internal/sm4/sm4_gcm_mb.h index e1d18aca..e826715a 100644 --- a/sources/ippcp/crypto_mb/include/internal/sm4/sm4_gcm_mb.h +++ b/sources/ippcp/crypto_mb/include/internal/sm4/sm4_gcm_mb.h @@ -22,12 +22,12 @@ #define SM4_GCM_MB_H ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/* -// Constant from NIST Special Publication 800-38D +/* +// Constant from NIST Special Publication 800-38D // (Recommendation for GCMmode, p.5.2.1.1 Input Data) // len(P) <= 2^39-256 bits */ -static const int64u MAX_TXT_LEN = ((int64u)1 << 36) - 32; // length in bytes +static const int64u MAX_TXT_LEN = ((int64u)1 << 36) - 32; // length in bytes /* // Internal functions @@ -167,7 +167,7 @@ static const int rearrangeOrder[] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, to[14] = from[11]; \ to[15] = from[15]; -__INLINE __m512i inc_block32(__m512i x, const int8u *increment) { return mask_add_epi32(x, 0x1111, x, M512(increment)); } +__MBX_INLINE __m512i inc_block32(__m512i x, const int8u *increment) { return mask_add_epi32(x, 0x1111, x, M512(increment)); } static __ALIGN64 const int8u initialInc[] = { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; diff --git a/sources/ippcp/crypto_mb/include/internal/sm4/sm4_mb.h b/sources/ippcp/crypto_mb/include/internal/sm4/sm4_mb.h index 99cd25a2..d080e03f 100644 --- a/sources/ippcp/crypto_mb/include/internal/sm4/sm4_mb.h +++ b/sources/ippcp/crypto_mb/include/internal/sm4/sm4_mb.h @@ -320,25 +320,25 @@ EXTERN_C void sm4_xts_kernel_mb16(int8u* pa_out[SM4_LINES], const int8u* pa_inp[ const int8u* pa_tweak[SM4_LINES], __mmask16 mb_mask, const int dir); // The transformation based on SM4 sbox algebraic structure, parameters were computed manually -__INLINE __m512i sBox512(__m512i block) +__MBX_INLINE __m512i sBox512(__m512i block) { block = _mm512_gf2p8affine_epi64_epi8(block, M512(affineIn), 0x65); block = _mm512_gf2p8affineinv_epi64_epi8(block, M512(affineOut), 0xd3); return block; } -__INLINE __m512i Lblock512(__m512i x) +__MBX_INLINE __m512i Lblock512(__m512i x) { return _mm512_ternarylogic_epi32(_mm512_xor_si512(_mm512_rol_epi32(x, 2), _mm512_rol_epi32(x, 10)), _mm512_rol_epi32(x, 18), _mm512_shuffle_epi8 (x, _mm512_loadu_si512(shuf8)), 0x96); } -__INLINE __m512i Lkey512(__m512i x) +__MBX_INLINE __m512i Lkey512(__m512i x) { return _mm512_xor_epi32(_mm512_rol_epi32(x, 13), _mm512_rol_epi32(x, 23)); } -__INLINE __m512i IncBlock512(__m512i x, const int8u* increment) +__MBX_INLINE __m512i IncBlock512(__m512i x, const int8u* increment) { __m512i t = _mm512_add_epi64(x, M512(increment)); __mmask8 carryMask = _mm512_cmplt_epu64_mask(t, x); @@ -472,7 +472,7 @@ __INLINE __m512i IncBlock512(__m512i x, const int8u* increment) \ T0=K0,T1=K1,T2=K2,T3=K3 -__INLINE void TRANSPOSE_16x4_I32_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __m512i* t3, const int8u* p_inp[16], __mmask16 mb_mask) { +__MBX_INLINE void TRANSPOSE_16x4_I32_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __m512i* t3, const int8u* p_inp[16], __mmask16 mb_mask) { __mmask16 loc_mb_mask = mb_mask; // L0 - L3 @@ -510,7 +510,7 @@ __INLINE void TRANSPOSE_16x4_I32_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __ *t3 = _mm512_unpackhi_epi64(z1, z3); } -__INLINE void TRANSPOSE_16x4_I32_XMM_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __m512i* t3, const __m128i in[16]) { +__MBX_INLINE void TRANSPOSE_16x4_I32_XMM_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __m512i* t3, const __m128i in[16]) { // L0 - L3 __m512i z0 = _mm512_castsi128_si512(in[0]); __m512i z1 = _mm512_castsi128_si512(in[1]); @@ -546,7 +546,7 @@ __INLINE void TRANSPOSE_16x4_I32_XMM_EPI32(__m512i* t0, __m512i* t1, __m512i* t2 *t3 = _mm512_unpackhi_epi64(z1, z3); } -__INLINE void TRANSPOSE_4x16_I32_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __m512i* t3, int8u* p_out[16], __mmask16 mb_mask) { +__MBX_INLINE void TRANSPOSE_4x16_I32_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __m512i* t3, int8u* p_out[16], __mmask16 mb_mask) { #define STORE_RESULT(OUT, store_mask, loc_mb_mask, Ti) \ _mm512_mask_storeu_epi32(OUT, store_mask * (0x1&loc_mb_mask), Ti); \ @@ -591,7 +591,7 @@ __INLINE void TRANSPOSE_4x16_I32_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __ } -__INLINE void TRANSPOSE_4x16_I32_XMM_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __m512i* t3, __m128i out[16]) { +__MBX_INLINE void TRANSPOSE_4x16_I32_XMM_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __m512i* t3, __m128i out[16]) { __m512i z0 = _mm512_unpacklo_epi32(*t0, *t1); __m512i z1 = _mm512_unpackhi_epi32(*t0, *t1); @@ -630,7 +630,7 @@ __INLINE void TRANSPOSE_4x16_I32_XMM_EPI32(__m512i* t0, __m512i* t1, __m512i* t2 } -__INLINE void TRANSPOSE_4x16_I32_O128_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __m512i* t3, __m128i p_out[16], __mmask16 mb_mask) { +__MBX_INLINE void TRANSPOSE_4x16_I32_O128_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __m512i* t3, __m128i p_out[16], __mmask16 mb_mask) { #define STORE_RESULT(OUT, store_mask, loc_mb_mask, Ti) \ _mm512_mask_storeu_epi32(OUT, store_mask * (0x1&loc_mb_mask), Ti); \ @@ -675,7 +675,7 @@ __INLINE void TRANSPOSE_4x16_I32_O128_EPI32(__m512i* t0, __m512i* t1, __m512i* t } -__INLINE void TRANSPOSE_4x16_I32_EPI8(__m512i t0, __m512i t1, __m512i t2, __m512i t3, int8u* p_out[16], int* p_loc_len, __mmask16 mb_mask) { +__MBX_INLINE void TRANSPOSE_4x16_I32_EPI8(__m512i t0, __m512i t1, __m512i t2, __m512i t3, int8u* p_out[16], int* p_loc_len, __mmask16 mb_mask) { #define STORE_RESULT_EPI8(OUT, store_mask, loc_mb_mask, Ti) \ _mm512_mask_storeu_epi8(OUT, store_mask * (0x1&loc_mb_mask), Ti); \ @@ -737,7 +737,7 @@ __INLINE void TRANSPOSE_4x16_I32_EPI8(__m512i t0, __m512i t1, __m512i t2, __m512 STORE_RESULT_EPI8((__m128i*)p_out[15] - 3, stream_mask << 48, loc_mb_mask, t3); } -__INLINE void TRANSPOSE_AND_XOR_4x16_I32_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __m512i* t3, int8u* p_out[16], const int8u* p_iv[16], __mmask16 mb_mask) { +__MBX_INLINE void TRANSPOSE_AND_XOR_4x16_I32_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __m512i* t3, int8u* p_out[16], const int8u* p_iv[16], __mmask16 mb_mask) { #define XOR_AND_STORE_RESULT(OUT, store_mask, loc_mb_mask, Ti, IV, TMP) \ TMP = _mm512_maskz_loadu_epi32(store_mask * (0x1&loc_mb_mask), IV); \ @@ -787,7 +787,7 @@ __INLINE void TRANSPOSE_AND_XOR_4x16_I32_EPI32(__m512i* t0, __m512i* t1, __m512i XOR_AND_STORE_RESULT((__m128i*)p_out[15] - 3, 0xF000, loc_mb_mask, *t3, (__m128i*)p_iv[15] - 3, z3); } -__INLINE void TRANSPOSE_AND_XOR_4x16_I32_EPI8(__m512i t0, __m512i t1, __m512i t2, __m512i t3, int8u* p_out[16], const int8u* p_iv[16], int* p_loc_len, __mmask16 mb_mask) { +__MBX_INLINE void TRANSPOSE_AND_XOR_4x16_I32_EPI8(__m512i t0, __m512i t1, __m512i t2, __m512i t3, int8u* p_out[16], const int8u* p_iv[16], int* p_loc_len, __mmask16 mb_mask) { #define XOR_AND_STORE_RESULT_EPI8(OUT, store_mask, loc_mb_mask, Ti, IV, TMP) \ TMP = _mm512_maskz_loadu_epi8(store_mask * (0x1&loc_mb_mask), IV); \ diff --git a/sources/ippcp/crypto_mb/src/cmake/windows/IntelLLVM.cmake b/sources/ippcp/crypto_mb/src/cmake/windows/IntelLLVM.cmake new file mode 100644 index 00000000..312f7466 --- /dev/null +++ b/sources/ippcp/crypto_mb/src/cmake/windows/IntelLLVM.cmake @@ -0,0 +1,94 @@ +#=============================================================================== +# Copyright (C) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# +#=============================================================================== + +# Security Linker flags + +set(LINK_FLAG_SECURITY "") + +# Specifies whether to generate an executable image that can be randomly rebased at load time. +set(LINK_FLAG_SECURITY "${LINK_FLAG_SECURITY} /DYNAMICBASE") +# This option modifies the header of an executable image, a .dll file or .exe file, to indicate whether ASLR with 64-bit addresses is supported. +set(LINK_FLAG_SECURITY "${LINK_FLAG_SECURITY} /HIGHENTROPYVA") +# The /LARGEADDRESSAWARE option tells the linker that the application can handle addresses larger than 2 gigabytes. +set(LINK_FLAG_SECURITY "${LINK_FLAG_SECURITY} /LARGEADDRESSAWARE") +# Indicates that an executable is compatible with the Windows Data Execution Prevention (DEP) feature +set(LINK_FLAG_SECURITY "${LINK_FLAG_SECURITY} /NXCOMPAT") + +# Security Compiler flags + +set(CMAKE_C_FLAGS_SECURITY "") +# Detect some buffer overruns. +set(CMAKE_C_FLAGS_SECURITY "${CMAKE_C_FLAGS_SECURITY} /GS") +# Warning level = 3 +set(CMAKE_C_FLAGS_SECURITY "${CMAKE_C_FLAGS_SECURITY} /W3") +# Changes all warnings to errors. +set(CMAKE_C_FLAGS_SECURITY "${CMAKE_C_FLAGS_SECURITY} /WX") +# Enable Intel® Control-Flow Enforcement Technology (Intel® CET) protection +set(CMAKE_C_FLAGS_SECURITY "${CMAKE_C_FLAGS_SECURITY} -fcf-protection:full") +# Changes all warnings to errors. +set(CMAKE_C_FLAGS_SECURITY "${CMAKE_C_FLAGS_SECURITY} /WX") + +# Linker flags + +# Add export files +if(MBX_FIPS_MODE) + set(LINK_FLAGS_DYNAMIC "/DEF:${CRYPTO_MB_SOURCES_DIR}/cmake/dll_export/crypto_mb_fips_selftests.defs") +else() + set(LINK_FLAGS_DYNAMIC "/DEF:${CRYPTO_MB_SOURCES_DIR}/cmake/dll_export/crypto_mb.defs") +endif() + +# Disables linking to Intel® libraries +set(LINK_FLAG_DYNAMIC_WINDOWS "${LINK_FLAG_DYNAMIC_WINDOWS} /Qno-intel-lib") + +# Compiler flags + +# Tells the compiler to align functions and loops +# set(CMAKE_C_FLAGS "/Qfnalign:32 /Qalign-loops:32") +# Suppress warning #10120: overriding '/O2' with '/O3' +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -wd10120 -Wno-unused-command-line-argument -Wno-unused-parameter -Wno-pointer-sign -Wno-sign-compare -Wno-static-in-inline /Qno-intel-lib") +# Ensures that compilation takes place in a freestanding environment +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qfreestanding") + +if(CODE_COVERAGE) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qrof-gen:srcpos /Qprof-dir:${PROF_DATA_DIR}") +endif() + +set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS}") + +# Tells the compiler to conform to a specific language standard. +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qstd=c99") + +# Causes the application to use the multithread, static version of the run-time library +set(CMAKE_C_FLAGS_RELEASE "/MT") +# Optimization level = 3 +set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /O3") +# No-debug macro +set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /DNDEBUG") +set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}") + +# Causes the application to use the multithread, static version of the run-time library (debug version). +set(CMAKE_C_FLAGS_DEBUG "/MTd") +# The /Zi option produces a separate PDB file that contains all the symbolic debugging information for use with the debugger. +set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /Zi") +# Turns off all optimizations. +set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /Od") +# Debug macro +set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /D_DEBUG") +set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}") + +# Optimisation dependent flags +set(AVX512_CFLAGS "-march=icelake-server -mavx512dq -mavx512ifma -mavx512f -mavx512vbmi2 -mavx512cd -mavx512bw -mbmi2") diff --git a/sources/ippcp/crypto_mb/src/common/cpu_features.c b/sources/ippcp/crypto_mb/src/common/cpu_features.c index ead6995b..d3e0fb53 100644 --- a/sources/ippcp/crypto_mb/src/common/cpu_features.c +++ b/sources/ippcp/crypto_mb/src/common/cpu_features.c @@ -14,6 +14,9 @@ * limitations under the License. *************************************************************************/ +#if defined( _WIN32 ) || defined( _WIN64 ) +#include +#endif #include #include @@ -58,7 +61,7 @@ #define edx_ (3) -__INLINE void _mbcp_cpuid(int32u buf[4], int32u leaf, int32u subleaf) +__MBX_INLINE void _mbcp_cpuid(int32u buf[4], int32u leaf, int32u subleaf) { #ifdef __GNUC__ __asm__ ("cpuid" : "=a" (buf[0]), "=b" (buf[1]), "=c" (buf[2]), "=d" (buf[3]) : "a" (leaf), "c" (subleaf)); diff --git a/sources/ippcp/crypto_mb/src/common/crypto_mb_res.gen b/sources/ippcp/crypto_mb/src/common/crypto_mb_res.gen index 3fc35cb9..5d918721 100644 --- a/sources/ippcp/crypto_mb/src/common/crypto_mb_res.gen +++ b/sources/ippcp/crypto_mb/src/common/crypto_mb_res.gen @@ -38,7 +38,7 @@ BEGIN BLOCK "040904b0" BEGIN VALUE "CompanyName", "Intel Corporation.\0" - VALUE "FileVersion", STR( MBX_VERSION() ) "\0" + VALUE "FileVersion", STR_FILE_MBX_VERSION() "\0" VALUE "ProductName", MBX_LIB_SHORTNAME() ". Intel(R) Integrated Performance Primitives. " MBX_LIB_LONGNAME() ".\0" VALUE "ProductVersion", CRYPTO_MB_STR_VERSION() "\0" VALUE "LegalCopyright", "Copyright (C) 1999-2021, Intel Corporation. All rights reserved.\0" diff --git a/sources/ippcp/crypto_mb/src/common/ifma_cvt52.c b/sources/ippcp/crypto_mb/src/common/ifma_cvt52.c index e6db178c..8898ef79 100644 --- a/sources/ippcp/crypto_mb/src/common/ifma_cvt52.c +++ b/sources/ippcp/crypto_mb/src/common/ifma_cvt52.c @@ -33,11 +33,11 @@ #define MIN(a, b) ( ((a) < (b)) ? a : b ) -__INLINE __mmask8 MB_MASK(int L) { +__MBX_INLINE __mmask8 MB_MASK(int L) { return (L > 0) ? (__mmask8)0xFF : (__mmask8)0; } -__INLINE __mmask64 SB_MASK1(int L, int REV) +__MBX_INLINE __mmask64 SB_MASK1(int L, int REV) { if (L <= 0) return (__mmask64)0x0; @@ -65,7 +65,7 @@ __INLINE __mmask64 SB_MASK1(int L, int REV) // - 8 hex strings -> mb8 */ DISABLE_OPTIMIZATION -__INLINE void transform_8sb_to_mb8(U64 out_mb8[], int bitLen, int8u *inp[8], int inpLen[8], int flag) { +__MBX_INLINE void transform_8sb_to_mb8(U64 out_mb8[], int bitLen, int8u *inp[8], int inpLen[8], int flag) { // inverse bytes (reverse=1) const __m512i bswap_mask = _mm512_set_epi64( 0x0001020304050607, 0x08090a0b0c0d0e0f, @@ -254,7 +254,7 @@ int8u ifma_HexStr8_to_mb8(int64u out_mb8[][8], const int8u* const pStr[8], int b // - mb8 -> 8 hex strings */ DISABLE_OPTIMIZATION -__INLINE void transform_mb8_to_8sb(int8u* out[8], int outLen[8], const U64 inp_mb8[], int bitLen, int flag) +__MBX_INLINE void transform_mb8_to_8sb(int8u* out[8], int outLen[8], const U64 inp_mb8[], int bitLen, int flag) { // inverse bytes (reverse=1) const __m512i bswap_mask = _mm512_set_epi64( diff --git a/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_n384.c b/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_n384.c index 3752da68..38a5de23 100644 --- a/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_n384.c +++ b/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_n384.c @@ -310,7 +310,7 @@ void MB_FUNC_NAME(ifma_frommont52_n384_)(U64 r[], const U64 a[]) #define fe52_mul MB_FUNC_NAME(ifma_amm52_n384_) /* r = base^(2^n) */ -__INLINE void fe52_sqr_pwr(U64 r[], const U64 base[], int n) +__MBX_INLINE void fe52_sqr_pwr(U64 r[], const U64 base[], int n) { if(r!=base) { fe52_sqr(r,base); diff --git a/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_n521.c b/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_n521.c index c92a2d6e..37b3b6f0 100644 --- a/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_n521.c +++ b/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_n521.c @@ -237,7 +237,7 @@ void MB_FUNC_NAME(ifma_ams52_n521_)(U64 r[], const U64 va[]) U64 r20, r21; U64 u; - r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = r8 = r9 = r10 = + r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = r8 = r9 = r10 = r11 = r12 = r13 = r14 = r15 = r16 = r17 = r18 = r19 = r20 = r21 = get_zero64(); // full square @@ -399,7 +399,7 @@ void MB_FUNC_NAME(ifma_frommont52_n521_)(U64 r[], const U64 a[]) #define fe52_mul MB_FUNC_NAME(ifma_amm52_n521_) /* r = base^(2^n) */ -__INLINE void fe52_sqr_pwr(U64 r[], const U64 base[], int n) +__MBX_INLINE void fe52_sqr_pwr(U64 r[], const U64 base[], int n) { if(r!=base) { fe52_sqr(r,base); @@ -504,7 +504,7 @@ void MB_FUNC_NAME(ifma_aminv52_n521_)(U64 r[], const U64 z[]) Specialized single operations over n521 add, sub, neg =====================================================================*/ -__INLINE __mb_mask MB_FUNC_NAME(lt_mbx_digit_)(const U64 a, const U64 b, const __mb_mask lt_mask) +__MBX_INLINE __mb_mask MB_FUNC_NAME(lt_mbx_digit_)(const U64 a, const U64 b, const __mb_mask lt_mask) { U64 d = mask_sub64(sub64(a, b), lt_mask, sub64(a, b), set1(1)); return cmp64_mask(d, get_zero64(), _MM_CMPINT_LT); diff --git a/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_p384.c b/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_p384.c index 20391df5..57232dc5 100644 --- a/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_p384.c +++ b/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_p384.c @@ -332,7 +332,7 @@ void MB_FUNC_NAME(ifma_frommont52_p384_)(U64 r[], const U64 a[]) #define fe52_mul MB_FUNC_NAME(ifma_amm52_p384_) /* r = base^(2^n) */ -__INLINE void fe52_sqr_pwr(U64 r[], const U64 base[], int n) +__MBX_INLINE void fe52_sqr_pwr(U64 r[], const U64 base[], int n) { if(r!=base) MB_FUNC_NAME(mov_FE384_)(r, base); @@ -402,7 +402,7 @@ void MB_FUNC_NAME(ifma_aminv52_p384_)(U64 r[], const U64 z[]) Specialized single operations over p384: add, sub, neg =====================================================================*/ -__INLINE __mb_mask MB_FUNC_NAME(lt_mbx_digit_)(const U64 a, const U64 b, const __mb_mask lt_mask) +__MBX_INLINE __mb_mask MB_FUNC_NAME(lt_mbx_digit_)(const U64 a, const U64 b, const __mb_mask lt_mask) { U64 d = mask_sub64(sub64(a, b), lt_mask, sub64(a, b), set1(1)); return cmp64_mask(d, get_zero64(), _MM_CMPINT_LT); diff --git a/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_p521.c b/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_p521.c index 28c7979a..ea6896f9 100644 --- a/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_p521.c +++ b/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_p521.c @@ -99,7 +99,7 @@ void MB_FUNC_NAME(ifma_amm52_p521_)(U64 r[], const U64 va[], const U64 vb[]) U64 r10, r11, r12, r13, r14, r15, r16, r17, r18, r19; U64 r20, r21; - r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = r8 = r9 = + r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = r8 = r9 = r10 = r11 = r12 = r13 = r14 = r15 = r16 = r17 = r18 = r19= r20 = r21 = get_zero64(); @@ -282,7 +282,7 @@ void MB_FUNC_NAME(ifma_ams52_p521_)(U64 r[], const U64 va[]) U64 r10, r11, r12, r13, r14, r15, r16, r17, r18, r19; U64 r20, r21; - r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = r8 = r9 = r10 = + r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = r8 = r9 = r10 = r11 = r12 = r13 = r14 = r15 = r16 = r17 = r18 = r19 = r20 = r21 = get_zero64(); // full square @@ -486,7 +486,7 @@ void MB_FUNC_NAME(ifma_frommont52_p521_)(U64 r[], const U64 a[]) #define fe52_mul MB_FUNC_NAME(ifma_amm52_p521_) /* r = base^(2^n) */ -__INLINE void fe52_sqr_pwr(U64 r[], const U64 base[], int n) +__MBX_INLINE void fe52_sqr_pwr(U64 r[], const U64 base[], int n) { if(r!=base) MB_FUNC_NAME(mov_FE521_)(r, base); @@ -574,7 +574,7 @@ void MB_FUNC_NAME(ifma_aminv52_p521_)(U64 r[], const U64 z[]) Specialized single operations over p521: add, sub, neg =====================================================================*/ -__INLINE __mb_mask MB_FUNC_NAME(lt_mbx_digit_)(const U64 a, const U64 b, const __mb_mask lt_mask) +__MBX_INLINE __mb_mask MB_FUNC_NAME(lt_mbx_digit_)(const U64 a, const U64 b, const __mb_mask lt_mask) { U64 d = mask_sub64(sub64(a, b), lt_mask, sub64(a, b), set1(1)); return cmp64_mask(d, get_zero64(), _MM_CMPINT_LT); diff --git a/sources/ippcp/crypto_mb/src/ecnist/ifma_ecdsa_p256.c b/sources/ippcp/crypto_mb/src/ecnist/ifma_ecdsa_p256.c index 184fa35a..43a982fe 100644 --- a/sources/ippcp/crypto_mb/src/ecnist/ifma_ecdsa_p256.c +++ b/sources/ippcp/crypto_mb/src/ecnist/ifma_ecdsa_p256.c @@ -57,7 +57,7 @@ static void nistp256_ecdsa_inv_keys_mb8(U64 inv_skey[], // r = ([skey]*G).x mod n256 // // note: pay attention on skey[] presenttaion -// it should be transposed and zero expanded +// it should be transposed and zero expanded */ static __mb_mask nistp256_ecdsa_sign_r_mb8(U64 sign_r[], const U64 skey[], @@ -180,7 +180,7 @@ static __mb_mask nistp256_ecdsa_verify_mb8(U64 sign_r[], // pre-computation of ECDSA signature // // pa_inv_eph_skey[] array of pointers to the inversion of signer's ephemeral private keys -// pa_sign_rp[] array of pointers to the r-components of the signatures +// pa_sign_rp[] array of pointers to the r-components of the signatures // pa_eph_skey[] array of pointers to the ephemeral (nonce) signer's ephemeral private keys // pBuffer pointer to the scratch buffer // @@ -256,10 +256,10 @@ mbx_status mbx_nistp256_ecdsa_sign_setup_mb8(int64u* pa_inv_eph_skey[8], /* // computes ECDSA signature // -// pa_sign_r[] array of pointers to the r-components of the signatures -// pa_sign_s[] array of pointers to the s-components of the signatures +// pa_sign_r[] array of pointers to the r-components of the signatures +// pa_sign_s[] array of pointers to the s-components of the signatures // pa_msg[] array of pointers to the messages are being signed -// pa_sign_rp[] array of pointers to the pre-computed r-components of the signatures +// pa_sign_rp[] array of pointers to the pre-computed r-components of the signatures // pa_inv_eph_skey[] array of pointers to the inversion of signer's ephemeral private keys // pa_reg_skey[] array of pointers to the regular signer's ephemeral (nonce) private keys // pBuffer pointer to the scratch buffer @@ -277,7 +277,7 @@ mbx_status mbx_nistp256_ecdsa_sign_complete_mb8(int8u* pa_sign_r[8], int buf_no; /* test input pointers */ - if(NULL==pa_sign_r || NULL==pa_sign_s || NULL==pa_msg || + if(NULL==pa_sign_r || NULL==pa_sign_s || NULL==pa_msg || NULL==pa_sign_rp || NULL==pa_inv_eph_skey || NULL==pa_reg_skey) { status = MBX_SET_STS_ALL(MBX_STATUS_NULL_PARAM_ERR); return status; @@ -436,7 +436,7 @@ mbx_status mbx_nistp256_ecdsa_sign_mb8(int8u* pa_sign_r[8], int8u stt_mask_r = MB_FUNC_NAME(is_zero_FE256_)(sign_r); int8u stt_mask_s = MB_FUNC_NAME(is_zero_FE256_)(sign_s); - /* convert singnature components to strings */ + /* convert signature components to strings */ ifma_mb8_to_HexStr8(pa_sign_r, (const int64u(*)[8])sign_r, P256_BITSIZE); ifma_mb8_to_HexStr8(pa_sign_s, (const int64u(*)[8])sign_s, P256_BITSIZE); @@ -447,7 +447,7 @@ mbx_status mbx_nistp256_ecdsa_sign_mb8(int8u* pa_sign_r[8], /* // Verifies ECDSA signature -// pa_sign_r[] array of pointers to the computed r-components of the signatures +// pa_sign_r[] array of pointers to the computed r-components of the signatures // pa_sign_s[] array of pointers to the computed s-components of the signatures // pa_msg[] array of pointers to the messages are being signed // pa_pubx[] array of pointers to the public keys X-coordinates @@ -460,7 +460,7 @@ mbx_status mbx_nistp256_ecdsa_verify_mb8(const int8u* const pa_sign_r[8], const int8u* const pa_msg[8], const int64u* const pa_pubx[8], const int64u* const pa_puby[8], - const int64u* const pa_pubz[8], + const int64u* const pa_pubz[8], int8u* pBuffer) { mbx_status status = 0; @@ -605,7 +605,7 @@ mbx_status mbx_nistp256_ecdsa_sign_setup_ssl_mb8(BIGNUM* pa_inv_skey[8], MB_FUNC_NAME(zero_)((int64u (*)[8])T, sizeof(T)/sizeof(U64)); return status; } - + nistp256_ecdsa_inv_keys_mb8(T, T, 0); /* store results in suitable format */ ifma_mb8_to_BN_256(pa_inv_skey, (const int64u (*)[8])T); @@ -643,7 +643,7 @@ mbx_status mbx_nistp256_ecdsa_sign_complete_ssl_mb8(int8u* pa_sign_r[8], int buf_no; /* test input pointers */ - if(NULL==pa_sign_r || NULL==pa_sign_s || NULL==pa_msg || + if(NULL==pa_sign_r || NULL==pa_sign_s || NULL==pa_msg || NULL==pa_sign_rp || NULL==pa_inv_eph_skey || NULL==pa_reg_skey) { status = MBX_SET_STS_ALL(MBX_STATUS_NULL_PARAM_ERR); return status; @@ -785,7 +785,7 @@ mbx_status mbx_nistp256_ecdsa_sign_ssl_mb8(int8u* pa_sign_r[8], /* clear copy of the regular secret keys */ MB_FUNC_NAME(zero_)((int64u (*)[8])reg_key, sizeof(reg_key)/sizeof(U64)); - /* convert singnature components to strings */ + /* convert signature components to strings */ ifma_mb8_to_HexStr8(pa_sign_r, (const int64u(*)[8])sign_r, P256_BITSIZE); ifma_mb8_to_HexStr8(pa_sign_s, (const int64u(*)[8])sign_s, P256_BITSIZE); @@ -802,7 +802,7 @@ mbx_status mbx_nistp256_ecdsa_verify_ssl_mb8(const ECDSA_SIG* const pa_sig[8], const int8u* const pa_msg[8], const BIGNUM* const pa_pubx[8], const BIGNUM* const pa_puby[8], - const BIGNUM* const pa_pubz[8], + const BIGNUM* const pa_pubz[8], int8u* pBuffer) { mbx_status status = 0; @@ -840,7 +840,7 @@ mbx_status mbx_nistp256_ecdsa_verify_ssl_mb8(const ECDSA_SIG* const pa_sig[8], { if(pa_sig[buf_no] != NULL) { - ECDSA_SIG_get0(pa_sig[buf_no], (const BIGNUM(**))pa_sign_r + buf_no, + ECDSA_SIG_get0(pa_sig[buf_no], (const BIGNUM(**))pa_sign_r + buf_no, (const BIGNUM(**))pa_sign_s + buf_no); } } diff --git a/sources/ippcp/crypto_mb/src/ecnist/ifma_ecdsa_p384.c b/sources/ippcp/crypto_mb/src/ecnist/ifma_ecdsa_p384.c index 165f80ad..e0548c8e 100644 --- a/sources/ippcp/crypto_mb/src/ecnist/ifma_ecdsa_p384.c +++ b/sources/ippcp/crypto_mb/src/ecnist/ifma_ecdsa_p384.c @@ -57,7 +57,7 @@ static void nistp384_ecdsa_inv_keys_mb8(U64 inv_skey[], // r = ([skey]*G).x mod n384 // // note: pay attention on skey[] presenttaion -// it should be transposed and zero expanded +// it should be transposed and zero expanded */ static __mb_mask nistp384_ecdsa_sign_r_mb8(U64 sign_r[], const U64 skey[], @@ -180,7 +180,7 @@ static __mb_mask nistp384_ecdsa_verify_mb8(U64 sign_r[], // pre-computation of ECDSA signature // // pa_inv_eph_skey[] array of pointers to the inversion of signer's ephemeral private keys -// pa_sign_rp[] array of pointers to the r-components of the signatures +// pa_sign_rp[] array of pointers to the r-components of the signatures // pa_eph_skey[] array of pointers to the ephemeral (nonce) signer's ephemeral private keys // pBuffer pointer to the scratch buffer // @@ -256,10 +256,10 @@ mbx_status mbx_nistp384_ecdsa_sign_setup_mb8(int64u* pa_inv_eph_skey[8], /* // computes ECDSA signature // -// pa_sign_r[] array of pointers to the r-components of the signatures -// pa_sign_s[] array of pointers to the s-components of the signatures +// pa_sign_r[] array of pointers to the r-components of the signatures +// pa_sign_s[] array of pointers to the s-components of the signatures // pa_msg[] array of pointers to the messages are being signed -// pa_sign_rp[] array of pointers to the pre-computed r-components of the signatures +// pa_sign_rp[] array of pointers to the pre-computed r-components of the signatures // pa_inv_eph_skey[] array of pointers to the inversion of signer's ephemeral private keys // pa_reg_skey[] array of pointers to the regular signer's ephemeral (nonce) private keys // pBuffer pointer to the scratch buffer @@ -277,7 +277,7 @@ mbx_status mbx_nistp384_ecdsa_sign_complete_mb8(int8u* pa_sign_r[8], int buf_no; /* test input pointers */ - if(NULL==pa_sign_r || NULL==pa_sign_s || NULL==pa_msg || + if(NULL==pa_sign_r || NULL==pa_sign_s || NULL==pa_msg || NULL==pa_sign_rp || NULL==pa_inv_eph_skey || NULL==pa_reg_skey) { status = MBX_SET_STS_ALL(MBX_STATUS_NULL_PARAM_ERR); return status; @@ -417,7 +417,7 @@ mbx_status mbx_nistp384_ecdsa_sign_mb8(int8u* pa_sign_r[8], return status; } - /* compute inversion */ + /* compute inversion */ nistp384_ecdsa_inv_keys_mb8(inv_eph_key, inv_eph_key, pBuffer); /* compute r-component */ nistp384_ecdsa_sign_r_mb8(sign_r, scalar, pBuffer); @@ -435,7 +435,7 @@ mbx_status mbx_nistp384_ecdsa_sign_mb8(int8u* pa_sign_r[8], int8u stt_mask_r = MB_FUNC_NAME(is_zero_FE384_)(sign_r); int8u stt_mask_s = MB_FUNC_NAME(is_zero_FE384_)(sign_s); - /* convert singnature components to strings */ + /* convert signature components to strings */ ifma_mb8_to_HexStr8(pa_sign_r, (const int64u(*)[8])sign_r, P384_BITSIZE); ifma_mb8_to_HexStr8(pa_sign_s, (const int64u(*)[8])sign_s, P384_BITSIZE); @@ -629,7 +629,7 @@ mbx_status mbx_nistp384_ecdsa_sign_setup_ssl_mb8(BIGNUM* pa_inv_skey[8], /* store results in suitable format */ ifma_mb8_to_BN_384(pa_sign_rp, (const int64u (*)[8])T); - + status |= MBX_SET_STS_BY_MASK(status, stt_mask, MBX_STATUS_SIGNATURE_ERR); return status; } @@ -647,7 +647,7 @@ mbx_status mbx_nistp384_ecdsa_sign_complete_ssl_mb8(int8u* pa_sign_r[8], int buf_no; /* test input pointers */ - if(NULL==pa_sign_r || NULL==pa_sign_s || NULL==pa_msg || + if(NULL==pa_sign_r || NULL==pa_sign_s || NULL==pa_msg || NULL==pa_sign_rp || NULL==pa_inv_eph_skey || NULL==pa_reg_skey) { status = MBX_SET_STS_ALL(MBX_STATUS_NULL_PARAM_ERR); return status; @@ -775,7 +775,7 @@ mbx_status mbx_nistp384_ecdsa_sign_ssl_mb8(int8u* pa_sign_r[8], return status; } - /* compute inversion */ + /* compute inversion */ nistp384_ecdsa_inv_keys_mb8(inv_eph_key, inv_eph_key, pBuffer); /* compute r-component */ nistp384_ecdsa_sign_r_mb8(sign_r, scalar, pBuffer); @@ -789,7 +789,7 @@ mbx_status mbx_nistp384_ecdsa_sign_ssl_mb8(int8u* pa_sign_r[8], /* clear copy of the regular secret keys */ MB_FUNC_NAME(zero_)((int64u (*)[8])reg_key, sizeof(reg_key)/sizeof(U64)); - /* convert singnature components to strings */ + /* convert signature components to strings */ ifma_mb8_to_HexStr8(pa_sign_r, (const int64u(*)[8])sign_r, P384_BITSIZE); ifma_mb8_to_HexStr8(pa_sign_s, (const int64u(*)[8])sign_s, P384_BITSIZE); @@ -802,7 +802,7 @@ mbx_status mbx_nistp384_ecdsa_sign_ssl_mb8(int8u* pa_sign_r[8], } DLL_PUBLIC -mbx_status mbx_nistp384_ecdsa_verify_ssl_mb8(const ECDSA_SIG* const pa_sig[8], +mbx_status mbx_nistp384_ecdsa_verify_ssl_mb8(const ECDSA_SIG* const pa_sig[8], const int8u* const pa_msg[8], const BIGNUM* const pa_pubx[8], const BIGNUM* const pa_puby[8], diff --git a/sources/ippcp/crypto_mb/src/ecnist/ifma_ecdsa_p521.c b/sources/ippcp/crypto_mb/src/ecnist/ifma_ecdsa_p521.c index bc54ec89..77404111 100644 --- a/sources/ippcp/crypto_mb/src/ecnist/ifma_ecdsa_p521.c +++ b/sources/ippcp/crypto_mb/src/ecnist/ifma_ecdsa_p521.c @@ -57,7 +57,7 @@ static void nistp521_ecdsa_inv_keys_mb8(U64 inv_skey[], // r = ([skey]*G).x mod n521 // // note: pay attention on skey[] presenttaion -// it should be transposed and zero expanded +// it should be transposed and zero expanded */ static __mb_mask nistp521_ecdsa_sign_r_mb8(U64 sign_r[], const U64 skey[], @@ -135,7 +135,7 @@ static __mb_mask nistp521_ecdsa_verify_mb8(U64 sign_r[], /* h2 = sign_r * h */ MB_FUNC_NAME(ifma_tomont52_n521_)(h2, sign_r); MB_FUNC_NAME(ifma_amm52_n521_)(h2, h2, sign_s); - MB_FUNC_NAME(ifma_frommont52_n521_)(h2,h2); + MB_FUNC_NAME(ifma_frommont52_n521_)(h2,h2); int64u tmp[8][P521_LEN64]; int64u* pa_tmp[8] = {tmp[0], tmp[1], tmp[2], tmp[3], @@ -159,7 +159,7 @@ static __mb_mask nistp521_ecdsa_verify_mb8(U64 sign_r[], // P != 0 __mb_mask signature_err_mask = MB_FUNC_NAME(is_zero_point_cordinate_)(P.Z); - + /* sign_r_restored = P.X mod n */ __ALIGN64 U64 sign_r_restored[P521_LEN52]; MB_FUNC_NAME(get_nistp521_ec_affine_coords_)(sign_r_restored, NULL, &P); @@ -180,7 +180,7 @@ static __mb_mask nistp521_ecdsa_verify_mb8(U64 sign_r[], // pre-computation of ECDSA signature // // pa_inv_eph_skey[] array of pointers to the inversion of signer's ephemeral private keys -// pa_sign_rp[] array of pointers to the r-components of the signatures +// pa_sign_rp[] array of pointers to the r-components of the signatures // pa_eph_skey[] array of pointers to the ephemeral (nonce) signer's ephemeral private keys // pBuffer pointer to the scratch buffer // @@ -256,10 +256,10 @@ mbx_status mbx_nistp521_ecdsa_sign_setup_mb8(int64u* pa_inv_eph_skey[8], /* // computes ECDSA signature // -// pa_sign_r[] array of pointers to the r-components of the signatures -// pa_sign_s[] array of pointers to the s-components of the signatures +// pa_sign_r[] array of pointers to the r-components of the signatures +// pa_sign_s[] array of pointers to the s-components of the signatures // pa_msg[] array of pointers to the messages are being signed -// pa_sign_rp[] array of pointers to the pre-computed r-components of the signatures +// pa_sign_rp[] array of pointers to the pre-computed r-components of the signatures // pa_inv_eph_skey[] array of pointers to the inversion of signer's ephemeral private keys // pa_reg_skey[] array of pointers to the regular signer's ephemeral (nonce) private keys // pBuffer pointer to the scratch buffer @@ -277,7 +277,7 @@ mbx_status mbx_nistp521_ecdsa_sign_complete_mb8(int8u* pa_sign_r[8], int buf_no; /* test input pointers */ - if(NULL==pa_sign_r || NULL==pa_sign_s || NULL==pa_msg || + if(NULL==pa_sign_r || NULL==pa_sign_s || NULL==pa_msg || NULL==pa_sign_rp || NULL==pa_inv_eph_skey || NULL==pa_reg_skey) { status = MBX_SET_STS_ALL(MBX_STATUS_NULL_PARAM_ERR); return status; @@ -417,7 +417,7 @@ mbx_status mbx_nistp521_ecdsa_sign_mb8(int8u* pa_sign_r[8], return status; } - /* compute inversion */ + /* compute inversion */ nistp521_ecdsa_inv_keys_mb8(inv_eph_key, inv_eph_key, pBuffer); /* compute r-component */ nistp521_ecdsa_sign_r_mb8(sign_r, scalar, pBuffer); @@ -435,7 +435,7 @@ mbx_status mbx_nistp521_ecdsa_sign_mb8(int8u* pa_sign_r[8], int8u stt_mask_r = MB_FUNC_NAME(is_zero_FE521_)(sign_r); int8u stt_mask_s = MB_FUNC_NAME(is_zero_FE521_)(sign_s); - /* convert singnature components to strings */ + /* convert signature components to strings */ ifma_mb8_to_HexStr8(pa_sign_r, (const int64u(*)[8])sign_r, P521_BITSIZE); ifma_mb8_to_HexStr8(pa_sign_s, (const int64u(*)[8])sign_s, P521_BITSIZE); @@ -446,7 +446,7 @@ mbx_status mbx_nistp521_ecdsa_sign_mb8(int8u* pa_sign_r[8], /* // Verifies ECDSA signature -// pa_sign_r[] array of pointers to the computed r-components of the signatures +// pa_sign_r[] array of pointers to the computed r-components of the signatures // pa_sign_s[] array of pointers to the computed s-components of the signatures // pa_msg[] array of pointers to the messages are being signed // pa_pubx[] array of pointers to the public keys X-coordinates @@ -460,7 +460,7 @@ mbx_status mbx_nistp521_ecdsa_verify_mb8(const int8u* const pa_sign_r[8], const int8u* const pa_msg[8], const int64u* const pa_pubx[8], const int64u* const pa_puby[8], - const int64u* const pa_pubz[8], + const int64u* const pa_pubz[8], int8u* pBuffer) { mbx_status status = 0; @@ -625,7 +625,7 @@ mbx_status mbx_nistp521_ecdsa_sign_setup_ssl_mb8(BIGNUM* pa_inv_skey[8], /* store results in suitable format */ ifma_mb8_to_BN_521(pa_sign_rp, (const int64u (*)[8])T); - + status |= MBX_SET_STS_BY_MASK(status, stt_mask, MBX_STATUS_SIGNATURE_ERR); return status; } @@ -643,7 +643,7 @@ mbx_status mbx_nistp521_ecdsa_sign_complete_ssl_mb8(int8u* pa_sign_r[8], int buf_no; /* test input pointers */ - if(NULL==pa_sign_r || NULL==pa_sign_s || NULL==pa_msg || + if(NULL==pa_sign_r || NULL==pa_sign_s || NULL==pa_msg || NULL==pa_sign_rp || NULL==pa_inv_eph_skey || NULL==pa_reg_skey) { status = MBX_SET_STS_ALL(MBX_STATUS_NULL_PARAM_ERR); return status; @@ -771,7 +771,7 @@ mbx_status mbx_nistp521_ecdsa_sign_ssl_mb8(int8u* pa_sign_r[8], return status; } - /* compute inversion */ + /* compute inversion */ nistp521_ecdsa_inv_keys_mb8(inv_eph_key, inv_eph_key, pBuffer); /* compute r-component */ nistp521_ecdsa_sign_r_mb8(sign_r, scalar, pBuffer); @@ -785,7 +785,7 @@ mbx_status mbx_nistp521_ecdsa_sign_ssl_mb8(int8u* pa_sign_r[8], /* clear copy of the regular secret keys */ MB_FUNC_NAME(zero_)((int64u (*)[8])reg_key, sizeof(reg_key)/sizeof(U64)); - /* convert singnature components to strings */ + /* convert signature components to strings */ ifma_mb8_to_HexStr8(pa_sign_r, (const int64u(*)[8])sign_r, P521_BITSIZE); ifma_mb8_to_HexStr8(pa_sign_s, (const int64u(*)[8])sign_s, P521_BITSIZE); @@ -799,10 +799,10 @@ mbx_status mbx_nistp521_ecdsa_sign_ssl_mb8(int8u* pa_sign_r[8], DLL_PUBLIC mbx_status mbx_nistp521_ecdsa_verify_ssl_mb8(const ECDSA_SIG* const pa_sig[8], - const int8u* const pa_msg[8], + const int8u* const pa_msg[8], const BIGNUM* const pa_pubx[8], const BIGNUM* const pa_puby[8], - const BIGNUM* const pa_pubz[8], + const BIGNUM* const pa_pubz[8], int8u* pBuffer) { mbx_status status = 0; @@ -840,7 +840,7 @@ mbx_status mbx_nistp521_ecdsa_verify_ssl_mb8(const ECDSA_SIG* const pa_sig[8], { if(pa_sig[buf_no] != NULL) { - ECDSA_SIG_get0(pa_sig[buf_no], (const BIGNUM(**))pa_sign_r + buf_no, + ECDSA_SIG_get0(pa_sig[buf_no], (const BIGNUM(**))pa_sign_r + buf_no, (const BIGNUM(**))pa_sign_s + buf_no); } } @@ -858,7 +858,7 @@ mbx_status mbx_nistp521_ecdsa_verify_ssl_mb8(const ECDSA_SIG* const pa_sig[8], status |= MBX_SET_STS_BY_MASK(status, MB_FUNC_NAME(ifma_check_range_n521_)(sign_r), MBX_STATUS_MISMATCH_PARAM_ERR); status |= MBX_SET_STS_BY_MASK(status, MB_FUNC_NAME(ifma_check_range_n521_)(sign_s), MBX_STATUS_MISMATCH_PARAM_ERR); - if(!MBX_IS_ANY_OK_STS(status)) + if(!MBX_IS_ANY_OK_STS(status)) return status; P521_POINT W; @@ -874,7 +874,7 @@ mbx_status mbx_nistp521_ecdsa_verify_ssl_mb8(const ECDSA_SIG* const pa_sig[8], status |= MBX_SET_STS_BY_MASK(status, MB_FUNC_NAME(ifma_check_range_p521_)(W.Y), MBX_STATUS_MISMATCH_PARAM_ERR); status |= MBX_SET_STS_BY_MASK(status, MB_FUNC_NAME(ifma_check_range_p521_)(W.Z), MBX_STATUS_MISMATCH_PARAM_ERR); - if(!MBX_IS_ANY_OK_STS(status)) + if(!MBX_IS_ANY_OK_STS(status)) return status; __mb_mask signature_err_mask = nistp521_ecdsa_verify_mb8(sign_r,sign_s,msg, &W); diff --git a/sources/ippcp/crypto_mb/src/ecnist/ifma_ecpoint_p256.c b/sources/ippcp/crypto_mb/src/ecnist/ifma_ecpoint_p256.c index ecb00a06..0ef12318 100644 --- a/sources/ippcp/crypto_mb/src/ecnist/ifma_ecpoint_p256.c +++ b/sources/ippcp/crypto_mb/src/ecnist/ifma_ecpoint_p256.c @@ -30,8 +30,8 @@ /* // Presentation of point at infinity: -// - projective (X : Y : 0) -// - affine (0 : 0) +// - projective (X : Y : 0) +// - affine (0 : 0) */ /* @@ -330,7 +330,7 @@ static __NOINLINE void clear_secret_context(U64* wval, U64* dval, __mb_mask* dsi *sign = s & 1; *digit = (Ipp8u)d; */ -__INLINE void MB_FUNC_NAME(booth_recode_)(__mb_mask* sign, U64* dvalue, U64 wvalue) +__MBX_INLINE void MB_FUNC_NAME(booth_recode_)(__mb_mask* sign, U64* dvalue, U64 wvalue) { U64 one = set1(1); U64 zero = get_zero64(); @@ -493,7 +493,7 @@ void MB_FUNC_NAME(ifma_ec_nistp256_mul_point_)(P256_POINT* r, const P256_POINT* #define BP_WIN_SIZE MUL_BASEPOINT_WIN_SIZE /* defined in the header above */ -__INLINE void MB_FUNC_NAME(booth_recode_bp_)(__mb_mask* sign, U64* dvalue, U64 wvalue) +__MBX_INLINE void MB_FUNC_NAME(booth_recode_bp_)(__mb_mask* sign, U64* dvalue, U64 wvalue) { U64 one = set1(1); U64 zero = get_zero64(); @@ -509,7 +509,7 @@ __INLINE void MB_FUNC_NAME(booth_recode_bp_)(__mb_mask* sign, U64* dvalue, U64 w } /* extract affine affine point */ -__INLINE void MB_FUNC_NAME(extract_point_affine_)(P256_POINT_AFFINE* r, const SINGLE_P256_POINT_AFFINE* tbl, U64 idx) +__MBX_INLINE void MB_FUNC_NAME(extract_point_affine_)(P256_POINT_AFFINE* r, const SINGLE_P256_POINT_AFFINE* tbl, U64 idx) { /* decrement index (the table does not contain [0]*P */ U64 targIdx = sub64(idx, set1(1)); diff --git a/sources/ippcp/crypto_mb/src/ecnist/ifma_ecpoint_p384.c b/sources/ippcp/crypto_mb/src/ecnist/ifma_ecpoint_p384.c index 9ef01bff..a82d7ff8 100644 --- a/sources/ippcp/crypto_mb/src/ecnist/ifma_ecpoint_p384.c +++ b/sources/ippcp/crypto_mb/src/ecnist/ifma_ecpoint_p384.c @@ -30,8 +30,8 @@ /* // Presentation of point at infinity: -// - projective (X : Y : 0) -// - affine (0 : 0) +// - projective (X : Y : 0) +// - affine (0 : 0) */ /* @@ -329,7 +329,7 @@ static __NOINLINE void clear_secret_context(U64* wval, U64* dval, __mb_mask* dsi *sign = s & 1; *digit = (Ipp8u)d; */ -__INLINE void MB_FUNC_NAME(booth_recode_)(__mb_mask* sign, U64* dvalue, U64 wvalue) +__MBX_INLINE void MB_FUNC_NAME(booth_recode_)(__mb_mask* sign, U64* dvalue, U64 wvalue) { U64 one = set1(1); U64 zero = get_zero64(); @@ -492,7 +492,7 @@ void MB_FUNC_NAME(ifma_ec_nistp384_mul_point_)(P384_POINT* r, const P384_POINT* #define BP_WIN_SIZE MUL_BASEPOINT_WIN_SIZE /* defined in the header above */ -__INLINE void MB_FUNC_NAME(booth_recode_bp_)(__mb_mask* sign, U64* dvalue, U64 wvalue) +__MBX_INLINE void MB_FUNC_NAME(booth_recode_bp_)(__mb_mask* sign, U64* dvalue, U64 wvalue) { U64 one = set1(1); U64 zero = get_zero64(); @@ -508,7 +508,7 @@ __INLINE void MB_FUNC_NAME(booth_recode_bp_)(__mb_mask* sign, U64* dvalue, U64 w } /* extract affine affine point */ -__INLINE void MB_FUNC_NAME(extract_point_affine_)(P384_POINT_AFFINE* r, const SINGLE_P384_POINT_AFFINE* tbl, U64 idx) +__MBX_INLINE void MB_FUNC_NAME(extract_point_affine_)(P384_POINT_AFFINE* r, const SINGLE_P384_POINT_AFFINE* tbl, U64 idx) { /* decrement index (the table does not contain [0]*P */ U64 targIdx = sub64(idx, set1(1)); @@ -517,7 +517,7 @@ __INLINE void MB_FUNC_NAME(extract_point_affine_)(P384_POINT_AFFINE* r, const SI U64 ay0, ay1, ay2, ay3, ay4, ay5, ay6, ay7; /* assume the point at infinity is what need */ - ax0 = ax1 = ax2 = ax3 = ax4 = ax5 = ax6 = ax7= + ax0 = ax1 = ax2 = ax3 = ax4 = ax5 = ax6 = ax7= ay0 = ay1 = ay2 = ay3 = ay4 = ay5 = ay6 = ay7 = get_zero64(); /* find out what we actually need or just keep original infinity */ diff --git a/sources/ippcp/crypto_mb/src/ecnist/ifma_ecpoint_p521.c b/sources/ippcp/crypto_mb/src/ecnist/ifma_ecpoint_p521.c index ab22335d..e7fc8d90 100644 --- a/sources/ippcp/crypto_mb/src/ecnist/ifma_ecpoint_p521.c +++ b/sources/ippcp/crypto_mb/src/ecnist/ifma_ecpoint_p521.c @@ -30,8 +30,8 @@ /* // Presentation of point at infinity: -// - projective (X : Y : 0) -// - affine (0 : 0) +// - projective (X : Y : 0) +// - affine (0 : 0) */ /* @@ -333,7 +333,7 @@ static __NOINLINE void clear_secret_context(U64* wval, U64* dval, __mb_mask* dsi *sign = s & 1; *digit = (Ipp8u)d; */ -__INLINE void MB_FUNC_NAME(booth_recode_)(__mb_mask* sign, U64* dvalue, U64 wvalue) +__MBX_INLINE void MB_FUNC_NAME(booth_recode_)(__mb_mask* sign, U64* dvalue, U64 wvalue) { U64 one = set1(1); U64 zero = get_zero64(); @@ -496,7 +496,7 @@ void MB_FUNC_NAME(ifma_ec_nistp521_mul_point_)(P521_POINT* r, const P521_POINT* #define BP_WIN_SIZE MUL_BASEPOINT_WIN_SIZE /* defined in the header above */ -__INLINE void MB_FUNC_NAME(booth_recode_bp_)(__mb_mask* sign, U64* dvalue, U64 wvalue) +__MBX_INLINE void MB_FUNC_NAME(booth_recode_bp_)(__mb_mask* sign, U64* dvalue, U64 wvalue) { U64 one = set1(1); U64 zero = get_zero64(); @@ -512,7 +512,7 @@ __INLINE void MB_FUNC_NAME(booth_recode_bp_)(__mb_mask* sign, U64* dvalue, U64 w } /* extract affine affine point */ -__INLINE void MB_FUNC_NAME(extract_point_affine_)(P521_POINT_AFFINE* r, const SINGLE_P521_POINT_AFFINE* tbl, U64 idx) +__MBX_INLINE void MB_FUNC_NAME(extract_point_affine_)(P521_POINT_AFFINE* r, const SINGLE_P521_POINT_AFFINE* tbl, U64 idx) { /* decrement index (the table does not contain [0]*P */ U64 targIdx = sub64(idx, set1(1)); diff --git a/sources/ippcp/crypto_mb/src/ed25519/ifma_arith_ed25519.c b/sources/ippcp/crypto_mb/src/ed25519/ifma_arith_ed25519.c index 46a0bc72..c630d4cb 100644 --- a/sources/ippcp/crypto_mb/src/ed25519/ifma_arith_ed25519.c +++ b/sources/ippcp/crypto_mb/src/ed25519/ifma_arith_ed25519.c @@ -55,7 +55,7 @@ __ALIGN64 static const int64u ed25519_2_pm1_4[FE_LEN52][sizeof(U64) / sizeof(int }; /* ext => cached */ -__INLINE void ge_ext_to_cached_mb(ge52_cached_mb *r, const ge52_ext_mb* p) +__MBX_INLINE void ge_ext_to_cached_mb(ge52_cached_mb *r, const ge52_ext_mb* p) { fe52_add(r->YaddX, p->Y, p->X); fe52_sub(r->YsubX, p->Y, p->X); @@ -272,15 +272,15 @@ static void extract_precomputed_basepoint_dual(ge52_precomp_mb* p0, */ /* if msb set */ -__INLINE int32u isMsb_ct(int32u a) +__MBX_INLINE int32u isMsb_ct(int32u a) { return (int32u)0 - (a >> (sizeof(a) * 8 - 1)); } /* tests if a==0 */ -__INLINE int32u isZero(int32u a) +__MBX_INLINE int32u isZero(int32u a) { return isMsb_ct(~a & (a - 1)); } /* tests if a==b */ -__INLINE int32u isEqu(int32u a, int32u b) +__MBX_INLINE int32u isEqu(int32u a, int32u b) { return isZero(a ^ b); } void ifma_ed25519_mul_basepoint(ge52_ext_mb* r, const U64 scalar[]) @@ -297,7 +297,7 @@ void ifma_ed25519_mul_basepoint(ge52_ext_mb* r, const U64 scalar[]) __ALIGN64 ge52_p1p1_mb t; __ALIGN64 ge52_homo_mb s; - /* initial values are nuetral */ + /* initial values are neutral */ neutral_ge52_ext_mb(&r0); neutral_ge52_ext_mb(&r1); @@ -377,7 +377,7 @@ void ifma_ed25519_mul_basepoint(ge52_ext_mb* r, const U64 scalar[]) *sign = s & 1; *digit = (Ipp8u)d; */ -__INLINE void booth_recode(__mb_mask* sign, U64* dvalue, U64 wvalue) +__MBX_INLINE void booth_recode(__mb_mask* sign, U64* dvalue, U64 wvalue) { U64 one = set1(1); U64 zero = get_zero64(); diff --git a/sources/ippcp/crypto_mb/src/ed25519/ifma_arith_n25519.c b/sources/ippcp/crypto_mb/src/ed25519/ifma_arith_n25519.c index 0c5b8fc4..f97183e3 100644 --- a/sources/ippcp/crypto_mb/src/ed25519/ifma_arith_n25519.c +++ b/sources/ippcp/crypto_mb/src/ed25519/ifma_arith_n25519.c @@ -105,7 +105,7 @@ void ifma52_sub_with_borrow(U64 r[], const U64 x[], const U64 y[]) #endif } -// r = x -__INLINE void TRANSPOSE_16X16_I32(int32u out[][16], const int32u* const inp[16]) +__MBX_INLINE void TRANSPOSE_16X16_I32(int32u out[][16], const int32u* const inp[16]) { __m512i r0 = _mm512_loadu_si512(inp[0]); __m512i r1 = _mm512_loadu_si512(inp[1]); @@ -36,11 +36,11 @@ __INLINE void TRANSPOSE_16X16_I32(int32u out[][16], const int32u* const inp[16]) __m512i r15 = _mm512_loadu_si512(inp[15]); // tansposition - __m512i t0 = _mm512_unpacklo_epi32(r0, r1); // 0 16 1 17 4 20 5 21 8 24 9 25 12 28 13 29 + __m512i t0 = _mm512_unpacklo_epi32(r0, r1); // 0 16 1 17 4 20 5 21 8 24 9 25 12 28 13 29 __m512i t1 = _mm512_unpackhi_epi32(r0, r1); // 2 18 3 19 6 22 7 23 10 26 11 27 14 30 15 31 __m512i t2 = _mm512_unpacklo_epi32(r2, r3); // 32 48 33 49 ... __m512i t3 = _mm512_unpackhi_epi32(r2, r3); // 34 50 35 51 ... - __m512i t4 = _mm512_unpacklo_epi32(r4, r5); // 64 80 65 81 ... + __m512i t4 = _mm512_unpacklo_epi32(r4, r5); // 64 80 65 81 ... __m512i t5 = _mm512_unpackhi_epi32(r4, r5); // 66 82 67 83 ... __m512i t6 = _mm512_unpacklo_epi32(r6, r7); // 96 112 97 113 ... __m512i t7 = _mm512_unpackhi_epi32(r6, r7); // 98 114 99 115 ... @@ -57,15 +57,15 @@ __INLINE void TRANSPOSE_16X16_I32(int32u out[][16], const int32u* const inp[16]) r1 = _mm512_unpackhi_epi64(t0, t2); // 1 17 33 49 ... r2 = _mm512_unpacklo_epi64(t1, t3); // 2 18 34 49 ... r3 = _mm512_unpackhi_epi64(t1, t3); // 3 19 35 51 ... - r4 = _mm512_unpacklo_epi64(t4, t6); // 64 80 96 112 ... + r4 = _mm512_unpacklo_epi64(t4, t6); // 64 80 96 112 ... r5 = _mm512_unpackhi_epi64(t4, t6); // 65 81 97 114 ... r6 = _mm512_unpacklo_epi64(t5, t7); // 66 82 98 113 ... r7 = _mm512_unpackhi_epi64(t5, t7); // 67 83 99 115 ... - r8 = _mm512_unpacklo_epi64(t8, t10); // 128 144 160 176 ... + r8 = _mm512_unpacklo_epi64(t8, t10); // 128 144 160 176 ... r9 = _mm512_unpackhi_epi64(t8, t10); // 129 145 161 178 ... - r10 = _mm512_unpacklo_epi64(t9, t11); // 130 146 162 177 ... + r10 = _mm512_unpacklo_epi64(t9, t11); // 130 146 162 177 ... r11 = _mm512_unpackhi_epi64(t9, t11); // 131 147 163 179 ... - r12 = _mm512_unpacklo_epi64(t12, t14); // 192 208 228 240 ... + r12 = _mm512_unpacklo_epi64(t12, t14); // 192 208 228 240 ... r13 = _mm512_unpackhi_epi64(t12, t14); // 193 209 229 241 ... r14 = _mm512_unpacklo_epi64(t13, t15); // 194 210 230 242 ... r15 = _mm512_unpackhi_epi64(t13, t15); // 195 211 231 243 ... @@ -318,7 +318,7 @@ void sm3_avx512_mb16(int32u hash_pa[][16], const int8u* const msg_pa[16], int le _mm512_storeu_si512(hash_pa + 5, F); _mm512_storeu_si512(hash_pa + 6, G); _mm512_storeu_si512(hash_pa + 7, H); - + /* Update pointers to data, local lengths and mask */ _mm512_storeu_si512(loc_data, _mm512_mask_add_epi64(_mm512_set1_epi64((long long)&zero_buffer), (__mmask8)mb_mask, _mm512_loadu_si512(loc_data), _mm512_set1_epi64(SM3_MSG_BLOCK_SIZE))); _mm512_storeu_si512(loc_data + 8, _mm512_mask_add_epi64(_mm512_set1_epi64((long long)&zero_buffer), *((__mmask8*)&mb_mask + 1), _mm512_loadu_si512(loc_data+8), _mm512_set1_epi64(SM3_MSG_BLOCK_SIZE))); diff --git a/sources/ippcp/crypto_mb/src/sm4/gcm/internal/sm4_gcm_gctr_kernel_mb16.c b/sources/ippcp/crypto_mb/src/sm4/gcm/internal/sm4_gcm_gctr_kernel_mb16.c index f786b9a4..84761678 100644 --- a/sources/ippcp/crypto_mb/src/sm4/gcm/internal/sm4_gcm_gctr_kernel_mb16.c +++ b/sources/ippcp/crypto_mb/src/sm4/gcm/internal/sm4_gcm_gctr_kernel_mb16.c @@ -23,7 +23,7 @@ // Implementation is the same with SM4-CTR */ -__INLINE __m128i IncBlock128(__m128i x, int32u increment) { return _mm_add_epi32(x, _mm_maskz_loadu_epi32(1, &increment)); } +__MBX_INLINE __m128i IncBlock128(__m128i x, int32u increment) { return _mm_add_epi32(x, _mm_maskz_loadu_epi32(1, &increment)); } static void sm4_gctr_mask_kernel_mb16(__m512i *CTR, const __m512i *p_rk, diff --git a/sources/ippcp/crypto_mb/src/sm4/gcm/internal/sm4_gcm_update_ghash_full_blocks_mb16.c b/sources/ippcp/crypto_mb/src/sm4/gcm/internal/sm4_gcm_update_ghash_full_blocks_mb16.c index a5f3cfc2..e35f6cc3 100644 --- a/sources/ippcp/crypto_mb/src/sm4/gcm/internal/sm4_gcm_update_ghash_full_blocks_mb16.c +++ b/sources/ippcp/crypto_mb/src/sm4/gcm/internal/sm4_gcm_update_ghash_full_blocks_mb16.c @@ -17,7 +17,7 @@ #include #include -__INLINE void read_first(__m512i *data_blocks[4], const int8u *const pa_input[SM4_LINES], __mmask16 load_mask) +__MBX_INLINE void read_first(__m512i *data_blocks[4], const int8u *const pa_input[SM4_LINES], __mmask16 load_mask) { __mmask16 load_mask_0 = load_mask >> 0 * 4; __mmask16 load_mask_1 = load_mask >> 1 * 4; @@ -114,7 +114,7 @@ __INLINE void read_first(__m512i *data_blocks[4], const int8u *const pa_input[SM /* End of explicitly unrolled loop */ } -__INLINE void read_next(__m512i *data_blocks[4], const int8u *const pa_input[SM4_LINES], int block_number, __mmask16 load_mask) +__MBX_INLINE void read_next(__m512i *data_blocks[4], const int8u *const pa_input[SM4_LINES], int block_number, __mmask16 load_mask) { __mmask16 load_mask_0 = load_mask >> 0 * 4; __mmask16 load_mask_1 = load_mask >> 1 * 4; diff --git a/sources/ippcp/crypto_mb/src/sm4/gcm/internal/sm4_gcm_update_ghash_partial_blocks_mb16.c b/sources/ippcp/crypto_mb/src/sm4/gcm/internal/sm4_gcm_update_ghash_partial_blocks_mb16.c index 268acd32..c225be1c 100644 --- a/sources/ippcp/crypto_mb/src/sm4/gcm/internal/sm4_gcm_update_ghash_partial_blocks_mb16.c +++ b/sources/ippcp/crypto_mb/src/sm4/gcm/internal/sm4_gcm_update_ghash_partial_blocks_mb16.c @@ -17,7 +17,7 @@ #include #include -__INLINE void read_first(__m512i *data_blocks[4], const int8u *const pa_input[SM4_LINES], __m512i *input_len, __mmask16 load_mask) +__MBX_INLINE void read_first(__m512i *data_blocks[4], const int8u *const pa_input[SM4_LINES], __m512i *input_len, __mmask16 load_mask) { __mmask16 load_mask_0 = load_mask >> 0 * 4; __mmask16 load_mask_1 = load_mask >> 1 * 4; diff --git a/sources/ippcp/crypto_mb/src/sm4/sm4_ctr_mb16.c b/sources/ippcp/crypto_mb/src/sm4/sm4_ctr_mb16.c index 80ceeaa7..974fcd7f 100644 --- a/sources/ippcp/crypto_mb/src/sm4/sm4_ctr_mb16.c +++ b/sources/ippcp/crypto_mb/src/sm4/sm4_ctr_mb16.c @@ -142,7 +142,7 @@ static void sm4_ctr128_mask_kernel_mb16(__m512i* CTR, const __m512i* p_rk, __m51 } -__INLINE __m128i IncBlock128(__m128i x, int32u increment) +__MBX_INLINE __m128i IncBlock128(__m128i x, int32u increment) { __m128i t = _mm_add_epi64(x, _mm_maskz_loadu_epi32(1, &increment)); __mmask8 carryMask = _mm_cmplt_epu64_mask(t, x); diff --git a/sources/ippcp/crypto_mb/src/x25519/ifma_x25519.c b/sources/ippcp/crypto_mb/src/x25519/ifma_x25519.c index d11d9dc5..7364e7c4 100644 --- a/sources/ippcp/crypto_mb/src/x25519/ifma_x25519.c +++ b/sources/ippcp/crypto_mb/src/x25519/ifma_x25519.c @@ -54,7 +54,7 @@ __ALIGN64 static const int64u MOD_2_260_[8] = {19*32, 19*32, 19*32, 19*32, //////////////////////////////////////////////////////////// -__INLINE void ed25519_mul(U64 out[], const U64 a[], const U64 b[]) { +__MBX_INLINE void ed25519_mul(U64 out[], const U64 a[], const U64 b[]) { U64 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9; U64 *va = (U64*) a; @@ -83,7 +83,7 @@ __INLINE void ed25519_mul(U64 out[], const U64 a[], const U64 b[]) { ROUND_MUL(2, 4, r6, r7) ROUND_MUL(3, 3, r6, r7) ROUND_MUL(4, 2, r6, r7) - + ROUND_MUL(0, 0, r0, r1) ROUND_MUL(0, 1, r1, r2) ROUND_MUL(0, 2, r2, r3) @@ -116,18 +116,18 @@ __INLINE void ed25519_mul(U64 out[], const U64 a[], const U64 b[]) { } /* SQR -c=0 (0,0) -c=1 (0,1) -c=2 (0,2) (1,1) -c=3 (0,3) (1,2) -c=4 (0,4) (1,3) (2,2) -c=5 (1,4) (2,3) -c=6 (2,4) (3,3) -c=7 (3,4) +c=0 (0,0) +c=1 (0,1) +c=2 (0,2) (1,1) +c=3 (0,3) (1,2) +c=4 (0,4) (1,3) (2,2) +c=5 (1,4) (2,3) +c=6 (2,4) (3,3) +c=7 (3,4) c=8 (4,4) */ -__INLINE void ed25519_sqr(U64 out[], const U64 a[]) { +__MBX_INLINE void ed25519_sqr(U64 out[], const U64 a[]) { U64 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9; U64 *va = (U64*) a; @@ -299,7 +299,7 @@ static const int64u VMASK52[8] = {MASK52, MASK52, MASK52, MASK52, R##0 = fma52lo(R##0, srli64(R##4, 47), MOD_2_255); \ R##4 = and64(R##4, loadu64(VMASK_R4)); -__INLINE void ed25519_mul_dual(U64 out0[], U64 out1[], +__MBX_INLINE void ed25519_mul_dual(U64 out0[], U64 out1[], const U64 a0[], const U64 b0[], const U64 a1[], const U64 b1[]) { @@ -406,7 +406,7 @@ __INLINE void ed25519_mul_dual(U64 out0[], U64 out1[], storeu64(&vr1[4], r14); } -__INLINE void ed25519_sqr_dual(U64 out0[], U64 out1[], +__MBX_INLINE void ed25519_sqr_dual(U64 out0[], U64 out1[], const U64 a0[], const U64 a1[]) { U64 r00, r01, r02, r03, r04, r05, r06, r07, r08, r09; @@ -514,7 +514,7 @@ __INLINE void ed25519_sqr_dual(U64 out0[], U64 out1[], ////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////// -__INLINE void fe52mb8_set(U64 out[], int64u value) +__MBX_INLINE void fe52mb8_set(U64 out[], int64u value) { storeu64(&out[0], set64((long long)value)); storeu64(&out[1], get_zero64()); @@ -522,7 +522,7 @@ __INLINE void fe52mb8_set(U64 out[], int64u value) storeu64(&out[3], get_zero64()); storeu64(&out[4], get_zero64()); } -__INLINE void fe52mb8_copy(U64 out[], const U64 in[]) +__MBX_INLINE void fe52mb8_copy(U64 out[], const U64 in[]) { storeu64(&out[0], loadu64(&in[0])); storeu64(&out[1], loadu64(&in[1])); @@ -533,7 +533,7 @@ __INLINE void fe52mb8_copy(U64 out[], const U64 in[]) // Clang warning -Wunused-function #if(0) -__INLINE void fe52mb8_mul_mod25519(U64 vr[], const U64 va[], const U64 vb[]) +__MBX_INLINE void fe52mb8_mul_mod25519(U64 vr[], const U64 va[], const U64 vb[]) { U64 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9; r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = r8 = r9 = get_zero64(); @@ -558,7 +558,7 @@ __INLINE void fe52mb8_mul_mod25519(U64 vr[], const U64 va[], const U64 vb[]) ROUND_MUL(2, 4, r6, r7) ROUND_MUL(3, 3, r6, r7) ROUND_MUL(4, 2, r6, r7) - + ROUND_MUL(0, 0, r0, r1) ROUND_MUL(0, 1, r1, r2) ROUND_MUL(0, 2, r2, r3) @@ -594,13 +594,13 @@ __INLINE void fe52mb8_mul_mod25519(U64 vr[], const U64 va[], const U64 vb[]) storeu64(&vr[4], r4); } -__INLINE void fe52mb8_sqr_mod25519(U64 out[], const U64 a[]) +__MBX_INLINE void fe52mb8_sqr_mod25519(U64 out[], const U64 a[]) { fe52mb8_mul_mod25519(out, a, a); } #endif -__INLINE void fe52mb8_mul121666_mod25519(U64 vr[], const U64 va[]) +__MBX_INLINE void fe52mb8_mul121666_mod25519(U64 vr[], const U64 va[]) { U64 multiplier = set64(121666); @@ -647,20 +647,20 @@ __INLINE void fe52mb8_mul121666_mod25519(U64 vr[], const U64 va[]) // __ALIGN64 static const int64u prime25519[5] = { // PRIME25519_LO, PRIME25519_MID, PRIME25519_MID, PRIME25519_MID, PRIME25519_HI}; -__ALIGN64 static const int64u VPRIME25519_LO[8] = - { PRIME25519_LO, PRIME25519_LO, PRIME25519_LO, PRIME25519_LO, +__ALIGN64 static const int64u VPRIME25519_LO[8] = + { PRIME25519_LO, PRIME25519_LO, PRIME25519_LO, PRIME25519_LO, PRIME25519_LO, PRIME25519_LO, PRIME25519_LO, PRIME25519_LO }; -__ALIGN64 static const int64u VPRIME25519_MID[8] = - { PRIME25519_MID, PRIME25519_MID, PRIME25519_MID, PRIME25519_MID, +__ALIGN64 static const int64u VPRIME25519_MID[8] = + { PRIME25519_MID, PRIME25519_MID, PRIME25519_MID, PRIME25519_MID, PRIME25519_MID, PRIME25519_MID, PRIME25519_MID, PRIME25519_MID }; -__ALIGN64 static const int64u VPRIME25519_HI[8] = - { PRIME25519_HI, PRIME25519_HI, PRIME25519_HI, PRIME25519_HI, +__ALIGN64 static const int64u VPRIME25519_HI[8] = + { PRIME25519_HI, PRIME25519_HI, PRIME25519_HI, PRIME25519_HI, PRIME25519_HI, PRIME25519_HI, PRIME25519_HI, PRIME25519_HI }; -__INLINE U64 cmov_U64(U64 a, U64 b, __mb_mask kmask) +__MBX_INLINE U64 cmov_U64(U64 a, U64 b, __mb_mask kmask) { return mask_mov64 (a, kmask, b); } #define NORM_ASHIFTR(R, I, J) \ @@ -671,7 +671,7 @@ __INLINE U64 cmov_U64(U64 a, U64 b, __mb_mask kmask) R##J = add64(R##J, srli64(R##I, DIGIT_SIZE)); \ R##I = and64(R##I, loadu64(VMASK52)); -__INLINE void fe52mb8_add_mod25519(U64 vr[], const U64 va[], const U64 vb[]) +__MBX_INLINE void fe52mb8_add_mod25519(U64 vr[], const U64 va[], const U64 vb[]) { /* r = a+b */ U64 r0 = add64(va[0], vb[0]); @@ -709,7 +709,7 @@ __INLINE void fe52mb8_add_mod25519(U64 vr[], const U64 va[], const U64 vb[]) storeu64(&vr[4], cmov_U64(t4, r4, cmask)); } -__INLINE void fe52mb8_sub_mod25519(U64 vr[], const U64 va[], const U64 vb[]) +__MBX_INLINE void fe52mb8_sub_mod25519(U64 vr[], const U64 va[], const U64 vb[]) { /* r = a-b */ U64 r0 = sub64(va[0], vb[0]); @@ -747,7 +747,7 @@ __INLINE void fe52mb8_sub_mod25519(U64 vr[], const U64 va[], const U64 vb[]) storeu64(&vr[4], cmov_U64(r4, t4, cmask)); } -__INLINE void fe52mb8_red_p25519(U64 vr[], const U64 va[]) +__MBX_INLINE void fe52mb8_red_p25519(U64 vr[], const U64 va[]) { /* r = a-p */ U64 r0 = sub64(va[0], loadu64(VPRIME25519_LO)); @@ -788,7 +788,7 @@ __INLINE void fe52mb8_red_p25519(U64 vr[], const U64 va[]) considering the exponent as 2^255 - 21 = (2^5) * (2^250 - 1) + 11. */ -__INLINE void fe52mb8_inv_mod25519(U64 out[], const U64 z[]) +__MBX_INLINE void fe52mb8_inv_mod25519(U64 out[], const U64 z[]) { __ALIGN64 U64 t0[5]; __ALIGN64 U64 t1[5]; @@ -906,7 +906,7 @@ static void x25519_scalar_mul(U64 out[], U64 scalar[], U64 point[]) swap = b; fe52_sub(tmp0, x3, z3); fe52_sub(tmp1, x2, z2); - fe52_add(x2, x2, z2); + fe52_add(x2, x2, z2); fe52_add(z2, x3, z3); #ifdef USE_DUAL_MUL_SQR @@ -951,7 +951,7 @@ static void x25519_scalar_mul(U64 out[], U64 scalar[], U64 point[]) ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// -__INLINE void ed25519_mul_dual_wonorm(U64 out0[], U64 out1[], +__MBX_INLINE void ed25519_mul_dual_wonorm(U64 out0[], U64 out1[], const U64 a0[], const U64 b0[], const U64 a1[], const U64 b1[]) { @@ -1047,7 +1047,7 @@ __INLINE void ed25519_mul_dual_wonorm(U64 out0[], U64 out1[], storeu64(&vr1[4], r14); } -__INLINE void fe52mb8_mul_mod25519_wonorm(U64 vr[], const U64 va[], const U64 vb[]) +__MBX_INLINE void fe52mb8_mul_mod25519_wonorm(U64 vr[], const U64 va[], const U64 vb[]) { U64 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9; r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = r8 = r9 = get_zero64(); @@ -1072,7 +1072,7 @@ __INLINE void fe52mb8_mul_mod25519_wonorm(U64 vr[], const U64 va[], const U64 vb ROUND_MUL(2, 4, r6, r7) ROUND_MUL(3, 3, r6, r7) ROUND_MUL(4, 2, r6, r7) - + ROUND_MUL(0, 0, r0, r1) ROUND_MUL(0, 1, r1, r2) ROUND_MUL(0, 2, r2, r3) @@ -1102,7 +1102,7 @@ __INLINE void fe52mb8_mul_mod25519_wonorm(U64 vr[], const U64 va[], const U64 vb storeu64(&vr[4], r4); } -__INLINE void fe52mb8_mul121666_mod25519_wonorm(U64 vr[], const U64 va[]) +__MBX_INLINE void fe52mb8_mul121666_mod25519_wonorm(U64 vr[], const U64 va[]) { U64 multiplier = set64(121666); @@ -1136,7 +1136,7 @@ __INLINE void fe52mb8_mul121666_mod25519_wonorm(U64 vr[], const U64 va[]) storeu64(&vr[4], r4); } -__INLINE void x25519_scalar_mul_dual(U64 out[], U64 scalar[], U64 point[]) +__MBX_INLINE void x25519_scalar_mul_dual(U64 out[], U64 scalar[], U64 point[]) { __ALIGN64 U64 x1[5], x2[5], x3[5]; __ALIGN64 U64 z2[5], z3[5]; @@ -1180,7 +1180,7 @@ __INLINE void x25519_scalar_mul_dual(U64 out[], U64 scalar[], U64 point[]) fe52_sub(tmp0, x3, z3); fe52_sub(tmp1, x2, z2); - fe52_add(x2, x2, z2); + fe52_add(x2, x2, z2); fe52_add(z2, x3, z3); ed25519_mul_dual_wonorm(z3, z2, x2,tmp0, z2,tmp1); @@ -1575,19 +1575,19 @@ __ALIGN64 static int64u muTBL52[255][NUMBER_OF_DIGITS(256,DIGIT_SIZE)] = { {0x000deda7f334d2df, 0x00051af2a57b4a6a, 0x0006dceaa87bde9c, 0x000d07ba98fc64f8, 0x00006bbe0335c20e}, }; -__ALIGN64 static const int64u U2_0[8] = +__ALIGN64 static const int64u U2_0[8] = {0x000b1e0137d48290, 0x000b1e0137d48290, 0x000b1e0137d48290, 0x000b1e0137d48290, 0x000b1e0137d48290, 0x000b1e0137d48290, 0x000b1e0137d48290, 0x000b1e0137d48290}; -__ALIGN64 static const int64u U2_1[8] = +__ALIGN64 static const int64u U2_1[8] = {0x00051eb4d1207816, 0x00051eb4d1207816, 0x00051eb4d1207816, 0x00051eb4d1207816, 0x00051eb4d1207816, 0x00051eb4d1207816, 0x00051eb4d1207816, 0x00051eb4d1207816}; -__ALIGN64 static const int64u U2_2[8] = +__ALIGN64 static const int64u U2_2[8] = {0x000ca2b71d440f6a, 0x000ca2b71d440f6a, 0x000ca2b71d440f6a, 0x000ca2b71d440f6a, 0x000ca2b71d440f6a, 0x000ca2b71d440f6a, 0x000ca2b71d440f6a, 0x000ca2b71d440f6a}; -__ALIGN64 static const int64u U2_3[8] = +__ALIGN64 static const int64u U2_3[8] = {0x00054cb52385f46d, 0x00054cb52385f46d, 0x00054cb52385f46d, 0x00054cb52385f46d, 0x00054cb52385f46d, 0x00054cb52385f46d, 0x00054cb52385f46d, 0x00054cb52385f46d}; -__ALIGN64 static const int64u U2_4[8] = +__ALIGN64 static const int64u U2_4[8] = {0x0000215132111d83, 0x0000215132111d83, 0x0000215132111d83, 0x0000215132111d83, 0x0000215132111d83, 0x0000215132111d83, 0x0000215132111d83, 0x0000215132111d83}; diff --git a/sources/ippcp/ecnist/ifma_arith_n256.c b/sources/ippcp/ecnist/ifma_arith_n256.c index b08628b0..b17fa5a4 100644 --- a/sources/ippcp/ecnist/ifma_arith_n256.c +++ b/sources/ippcp/ecnist/ifma_arith_n256.c @@ -182,7 +182,7 @@ IPP_OWN_DEFN(m512, ifma_frommont52_n256, (const m512 a)) * note: z in in Montgomery domain * r in Montgomery domain */ -__INLINE m512 ifma_ams52_n256_ntimes(const m512 a, int n) +__IPPCP_INLINE m512 ifma_ams52_n256_ntimes(const m512 a, int n) { m512 r = a; for (; n > 0; --n) { diff --git a/sources/ippcp/ecnist/ifma_arith_n384.c b/sources/ippcp/ecnist/ifma_arith_n384.c index 9ecb3aaf..3e987e73 100644 --- a/sources/ippcp/ecnist/ifma_arith_n384.c +++ b/sources/ippcp/ecnist/ifma_arith_n384.c @@ -196,7 +196,7 @@ IPP_OWN_DEFN(m512, ifma_frommont52_n384, (const m512 a)) * note: z in in Montgomery domain * r in Montgomery domain */ -__INLINE m512 ifma_ams52_n384_ntimes(const m512 a, int n) +__IPPCP_INLINE m512 ifma_ams52_n384_ntimes(const m512 a, int n) { m512 r = a; for (; n > 0; --n) { diff --git a/sources/ippcp/ecnist/ifma_arith_n521.c b/sources/ippcp/ecnist/ifma_arith_n521.c index 37a5fa81..b16a322b 100644 --- a/sources/ippcp/ecnist/ifma_arith_n521.c +++ b/sources/ippcp/ecnist/ifma_arith_n521.c @@ -238,7 +238,7 @@ static void ifma_ams52_n521(fe521 pr[], const fe521 a) #define sqr(R, A) ifma_ams52_n521(&(R), (A)) /* r = base^(2^n) */ -__INLINE IPP_OWN_DEFN(void, ifma_ams52_p521_ntimes, (fe521 pr[], const fe521 a, int n)) +__IPPCP_INLINE IPP_OWN_DEFN(void, ifma_ams52_p521_ntimes, (fe521 pr[], const fe521 a, int n)) { fe521 r; FE521_COPY(r, a); diff --git a/sources/ippcp/ecnist/ifma_arith_p256.c b/sources/ippcp/ecnist/ifma_arith_p256.c index f3e4909c..b0b74fcd 100644 --- a/sources/ippcp/ecnist/ifma_arith_p256.c +++ b/sources/ippcp/ecnist/ifma_arith_p256.c @@ -171,14 +171,14 @@ IPP_OWN_DEFN(void, ifma_amm52_dual_p256, (m512 * r1, const m512 a1, const m512 b } /* R = (A*B) with norm */ -__INLINE m512 ifma_amm52_p256_norm(const m512 a, const m512 b) +__IPPCP_INLINE m512 ifma_amm52_p256_norm(const m512 a, const m512 b) { m512 r = ifma_amm52_p256(a, b); return ifma_lnorm52(r); } /* R = (A*A) with norm */ -__INLINE m512 ifma_ams52_p256_norm(const m512 a) +__IPPCP_INLINE m512 ifma_ams52_p256_norm(const m512 a) { return ifma_amm52_p256_norm(a, a); } @@ -263,7 +263,7 @@ IPP_OWN_DEFN(m512, ifma_frommont52_p256, (const m512 a)) #define sqr(R, A) (R) = ifma_ams52_p256_norm((A)) #define mul(R, A, B) (R) = ifma_amm52_p256_norm((A), (B)); -__INLINE m512 ifma_ams52_p256_ntimes(m512 a, Ipp32s n) +__IPPCP_INLINE m512 ifma_ams52_p256_ntimes(m512 a, Ipp32s n) { for (; n > 0; --n) sqr(a, a); diff --git a/sources/ippcp/ecnist/ifma_arith_p256.h b/sources/ippcp/ecnist/ifma_arith_p256.h index dedcdf16..3a9f47c7 100644 --- a/sources/ippcp/ecnist/ifma_arith_p256.h +++ b/sources/ippcp/ecnist/ifma_arith_p256.h @@ -63,7 +63,7 @@ IPP_OWN_DECL(void, ifma_amm52_dual_p256, (m512 *r1, const m512 a1, const m512 b1 * * \param[in] a value (in radix 2^52) */ -__INLINE IPP_OWN_DEFN(m512, ifma_ams52_p256, (const m512 a)) +__IPPCP_INLINE IPP_OWN_DEFN(m512, ifma_ams52_p256, (const m512 a)) { return ifma_amm52_p256(a, a); } @@ -78,7 +78,7 @@ __INLINE IPP_OWN_DEFN(m512, ifma_ams52_p256, (const m512 a)) * \param[out] r2 * \param[in] a2 value (in radix 2^52) */ -__INLINE IPP_OWN_DEFN(void, ifma_ams52_dual_p256, (m512 * r1, const m512 a1, m512 *r2, const m512 a2)) +__IPPCP_INLINE IPP_OWN_DEFN(void, ifma_ams52_dual_p256, (m512 * r1, const m512 a1, m512 *r2, const m512 a2)) { ifma_amm52_dual_p256(r1, a1, a1, r2, a2, a2); return; diff --git a/sources/ippcp/ecnist/ifma_arith_p384.c b/sources/ippcp/ecnist/ifma_arith_p384.c index c9fbec94..764259af 100644 --- a/sources/ippcp/ecnist/ifma_arith_p384.c +++ b/sources/ippcp/ecnist/ifma_arith_p384.c @@ -242,7 +242,7 @@ IPP_OWN_DEFN(void, ifma_amm52_dual_p384, (m512 * pr1, const m512 a1, const m512 } /* R = (A*B) with norm */ -__INLINE m512 ifma_amm52_p384_norm(const m512 a, const m512 b) +__IPPCP_INLINE m512 ifma_amm52_p384_norm(const m512 a, const m512 b) { m512 r = ifma_amm52_p384(a, b); /* normalization */ @@ -250,7 +250,7 @@ __INLINE m512 ifma_amm52_p384_norm(const m512 a, const m512 b) } /* R = (A*A) with norm */ -__INLINE m512 m512_sqr_norm(const m512 a) +__IPPCP_INLINE m512 m512_sqr_norm(const m512 a) { return ifma_amm52_p384_norm(a, a); } @@ -297,7 +297,7 @@ IPP_OWN_DEFN(m512, ifma_frommont52_p384, (const m512 a)) ifma_amm52_dual_p384(&(R1), (A1), (B1), &(R2), (A2), (B2)); \ ifma_lnorm52_dual(&(R1), (R1), &(R2), (R2)) -__INLINE m512 ifma_ams52_p384_ntimes(const m512 a, Ipp32s n) +__IPPCP_INLINE m512 ifma_ams52_p384_ntimes(const m512 a, Ipp32s n) { m512 r = a; for (; n > 0; --n) diff --git a/sources/ippcp/ecnist/ifma_arith_p384.h b/sources/ippcp/ecnist/ifma_arith_p384.h index 487665e5..6363dd7d 100644 --- a/sources/ippcp/ecnist/ifma_arith_p384.h +++ b/sources/ippcp/ecnist/ifma_arith_p384.h @@ -63,7 +63,7 @@ IPP_OWN_DECL(void, ifma_amm52_dual_p384, (m512 * r1, const m512 a1, const m512 b * * \param[in] a value (in radix 2^52) */ -__INLINE IPP_OWN_DEFN(m512, ifma_ams52_p384, (const m512 a)) +__IPPCP_INLINE IPP_OWN_DEFN(m512, ifma_ams52_p384, (const m512 a)) { return ifma_amm52_p384(a, a); } @@ -78,7 +78,7 @@ __INLINE IPP_OWN_DEFN(m512, ifma_ams52_p384, (const m512 a)) * \param[out] r2 * \param[in] a2 value (in radix 2^52) */ -__INLINE IPP_OWN_DEFN(void, ifma_ams52_dual_p384, (m512 * r1, const m512 a1, m512 *r2, const m512 a2)) +__IPPCP_INLINE IPP_OWN_DEFN(void, ifma_ams52_dual_p384, (m512 * r1, const m512 a1, m512 *r2, const m512 a2)) { ifma_amm52_dual_p384(r1, a1, a1, r2, a2, a2); return; diff --git a/sources/ippcp/ecnist/ifma_arith_p521.c b/sources/ippcp/ecnist/ifma_arith_p521.c index e284ff25..a2de5ecb 100644 --- a/sources/ippcp/ecnist/ifma_arith_p521.c +++ b/sources/ippcp/ecnist/ifma_arith_p521.c @@ -940,14 +940,14 @@ IPP_OWN_DEFN(void, ifma_frommont52_p521, (fe521 pr[], const fe521 a)) return; } -__INLINE IPP_OWN_DEFN(void, ifma_amm52_p521_norm, (fe521 pr[], const fe521 a, const fe521 b)) +__IPPCP_INLINE IPP_OWN_DEFN(void, ifma_amm52_p521_norm, (fe521 pr[], const fe521 a, const fe521 b)) { ifma_amm52_p521(pr, a, b); ifma_lnorm52_p521(pr, *pr); return; } -__INLINE IPP_OWN_DEFN(void, ifma_ams52_p521_norm, (fe521 pr[], const fe521 a)) +__IPPCP_INLINE IPP_OWN_DEFN(void, ifma_ams52_p521_norm, (fe521 pr[], const fe521 a)) { ifma_ams52_p521(pr, a); ifma_lnorm52_p521(pr, *pr); @@ -961,7 +961,7 @@ __INLINE IPP_OWN_DEFN(void, ifma_ams52_p521_norm, (fe521 pr[], const fe521 a)) ifma_lnorm52_dual_p521(&(R1), (R1), &(R2), (R2)) /* r = base^(2^n) */ -__INLINE IPP_OWN_DEFN(void, ifma_ams52_p521_ntimes, (fe521 pr[], const fe521 a, int n)) +__IPPCP_INLINE IPP_OWN_DEFN(void, ifma_ams52_p521_ntimes, (fe521 pr[], const fe521 a, int n)) { fe521 r; FE521_COPY(r, a); diff --git a/sources/ippcp/ecnist/ifma_defs.h b/sources/ippcp/ecnist/ifma_defs.h index ad36a599..73ec9c99 100644 --- a/sources/ippcp/ecnist/ifma_defs.h +++ b/sources/ippcp/ecnist/ifma_defs.h @@ -45,7 +45,7 @@ * \return 0xFF - if MSB = 1 * \return 0x00 - if MSB = 0 */ -__INLINE mask8 check_bit(const mask8 a, int bit) +__IPPCP_INLINE mask8 check_bit(const mask8 a, int bit) { return (mask8)((mask8)0 - ((a >> bit) & 1u)); } @@ -59,7 +59,7 @@ __INLINE mask8 check_bit(const mask8 a, int bit) * \return 0xFF - if input value is all zeroes * \return 0x00 - if input value is not all zeroes */ -__INLINE mask8 is_zero_i64(const m512 a) +__IPPCP_INLINE mask8 is_zero_i64(const m512 a) { const mask8 mask = cmp_i64_mask(a, setzero_i64(), _MM_CMPINT_NE); return check_bit((~mask & (mask - 1u)), 7); diff --git a/sources/ippcp/ecnist/ifma_defs_p521.h b/sources/ippcp/ecnist/ifma_defs_p521.h index fb5a843e..ad060e2d 100644 --- a/sources/ippcp/ecnist/ifma_defs_p521.h +++ b/sources/ippcp/ecnist/ifma_defs_p521.h @@ -61,12 +61,12 @@ static const __ALIGN64 Ipp64u P521R1_ONE52[P521R1_NUM_CHUNK][P521R1_LENFE521_52] FE521_MID(R) = m256_loadu_i64(FE521_MID(A)); \ FE521_HI(R) = m256_loadu_i64(FE521_HI(A)) -__INLINE mask8 is_msb_m256(const mask8 a) +__IPPCP_INLINE mask8 is_msb_m256(const mask8 a) { return ((mask8)0 - (a >> 7)); } -__INLINE mask8 is_zero_m256(const m256i a) +__IPPCP_INLINE mask8 is_zero_m256(const m256i a) { const mask8 mask = _mm256_cmp_epi64_mask(a, m256_setzero_i64(), _MM_CMPINT_NE); return is_msb_m256((~mask & (mask - 1))); diff --git a/sources/ippcp/ecnist/ifma_ecpoint_p256.c b/sources/ippcp/ecnist/ifma_ecpoint_p256.c index 6d836413..5bd98811 100644 --- a/sources/ippcp/ecnist/ifma_ecpoint_p256.c +++ b/sources/ippcp/ecnist/ifma_ecpoint_p256.c @@ -510,7 +510,7 @@ static __NOINLINE void clear_secret_context(Ipp16u *wval, #define WIN_SIZE (5) -__INLINE mask8 is_eq_mask(const Ipp32s a, const Ipp32s b) +__IPPCP_INLINE mask8 is_eq_mask(const Ipp32s a, const Ipp32s b) { const Ipp32s eq = a ^ b; const Ipp32s v = ~eq & (eq - 1); @@ -518,7 +518,7 @@ __INLINE mask8 is_eq_mask(const Ipp32s a, const Ipp32s b) return (mask8)(0 - msb); } -__INLINE void extract_table_point(P256_POINT_IFMA *r, const Ipp32s digit, const P256_POINT_IFMA *tbl) +__IPPCP_INLINE void extract_table_point(P256_POINT_IFMA *r, const Ipp32s digit, const P256_POINT_IFMA *tbl) { Ipp32s idx = digit - 1; @@ -659,7 +659,7 @@ IPP_OWN_DEFN(void, ifma_ec_nistp256_mul_point, (P256_POINT_IFMA * r, const P256_ #define BP_WIN_SIZE BASE_POINT_WIN_SIZE #define BP_N_ENTRY BASE_POINT_N_ENTRY -__INLINE void extract_point_affine(P256_POINT_AFFINE_IFMA *r, +__IPPCP_INLINE void extract_point_affine(P256_POINT_AFFINE_IFMA *r, const P256_POINT_AFFINE_IFMA_MEM *tbl, const Ipp32s digit) { diff --git a/sources/ippcp/ecnist/ifma_ecpoint_p256.h b/sources/ippcp/ecnist/ifma_ecpoint_p256.h index c6acb52b..269bda6a 100644 --- a/sources/ippcp/ecnist/ifma_ecpoint_p256.h +++ b/sources/ippcp/ecnist/ifma_ecpoint_p256.h @@ -138,7 +138,7 @@ IPP_OWN_DECL(void, p256r1_select_ap_w7_ifma, (BNU_CHUNK_T * pAffinePoint, const #include "pcpgfpstuff.h" #include "pcpgfpecstuff.h" -__INLINE void recode_point_to_mont52(P256_POINT_IFMA *pR, +__IPPCP_INLINE void recode_point_to_mont52(P256_POINT_IFMA *pR, const BNU_CHUNK_T *pP, BNU_CHUNK_T *pPool, ifmaArithMethod *method, @@ -166,7 +166,7 @@ __INLINE void recode_point_to_mont52(P256_POINT_IFMA *pR, pR->z = p_to_mont(pR->z); } -__INLINE void recode_point_to_mont64(IppsGFpECPoint *pR, +__IPPCP_INLINE void recode_point_to_mont64(IppsGFpECPoint *pR, P256_POINT_IFMA *pP, BNU_CHUNK_T *pPool, ifmaArithMethod *method, diff --git a/sources/ippcp/ecnist/ifma_ecpoint_p384.c b/sources/ippcp/ecnist/ifma_ecpoint_p384.c index 449adcd3..ed368f62 100644 --- a/sources/ippcp/ecnist/ifma_ecpoint_p384.c +++ b/sources/ippcp/ecnist/ifma_ecpoint_p384.c @@ -546,7 +546,7 @@ static __NOINLINE void clear_secret_context(Ipp16u *wval, #define WIN_SIZE (5) -__INLINE mask8 is_eq_mask(const Ipp32s a, const Ipp32s b) +__IPPCP_INLINE mask8 is_eq_mask(const Ipp32s a, const Ipp32s b) { const Ipp32s eq = a ^ b; const Ipp32s v = ~eq & (eq - 1); @@ -554,7 +554,7 @@ __INLINE mask8 is_eq_mask(const Ipp32s a, const Ipp32s b) return (mask8)(0 - msb); } -__INLINE void extract_table_point(P384_POINT_IFMA *r, const Ipp32s digit, const P384_POINT_IFMA *tbl) +__IPPCP_INLINE void extract_table_point(P384_POINT_IFMA *r, const Ipp32s digit, const P384_POINT_IFMA *tbl) { Ipp32s idx = digit - 1; @@ -691,7 +691,7 @@ IPP_OWN_DEFN(void, ifma_ec_nistp384_mul_point, (P384_POINT_IFMA * r, const P384_ #define BP_WIN_SIZE BASE_POINT_WIN_SIZE #define BP_N_ENTRY BASE_POINT_N_ENTRY -__INLINE void extract_point_affine(P384_POINT_AFFINE_IFMA *r, +__IPPCP_INLINE void extract_point_affine(P384_POINT_AFFINE_IFMA *r, const P384_POINT_AFFINE_IFMA_MEM *tbl, const Ipp32s digit) { diff --git a/sources/ippcp/ecnist/ifma_ecpoint_p384.h b/sources/ippcp/ecnist/ifma_ecpoint_p384.h index e682808b..a33a8bd6 100644 --- a/sources/ippcp/ecnist/ifma_ecpoint_p384.h +++ b/sources/ippcp/ecnist/ifma_ecpoint_p384.h @@ -138,7 +138,7 @@ IPP_OWN_DECL(void, p384r1_select_ap_w4_ifma, (BNU_CHUNK_T * pAffinePoint, const #include "pcpgfpstuff.h" #include "pcpgfpecstuff.h" -__INLINE void recode_point_to_mont52(P384_POINT_IFMA *pR, +__IPPCP_INLINE void recode_point_to_mont52(P384_POINT_IFMA *pR, const BNU_CHUNK_T *pP, BNU_CHUNK_T *pPool, ifmaArithMethod *method, @@ -166,7 +166,7 @@ __INLINE void recode_point_to_mont52(P384_POINT_IFMA *pR, pR->z = p_to_mont(pR->z); } -__INLINE void recode_point_to_mont64(const IppsGFpECPoint *pR, +__IPPCP_INLINE void recode_point_to_mont64(const IppsGFpECPoint *pR, P384_POINT_IFMA *pP, BNU_CHUNK_T *pPool, ifmaArithMethod *method, diff --git a/sources/ippcp/ecnist/ifma_ecpoint_p521.c b/sources/ippcp/ecnist/ifma_ecpoint_p521.c index 9aac2e10..51a7542e 100644 --- a/sources/ippcp/ecnist/ifma_ecpoint_p521.c +++ b/sources/ippcp/ecnist/ifma_ecpoint_p521.c @@ -519,7 +519,7 @@ static __NOINLINE void clear_secret_context(Ipp16u *wval, #define WIN_SIZE (5) -__INLINE mask8 is_eq_mask(const Ipp32s a, const Ipp32s b) +__IPPCP_INLINE mask8 is_eq_mask(const Ipp32s a, const Ipp32s b) { const Ipp32s eq = a ^ b; const Ipp32s v = ~eq & (eq - 1); @@ -527,7 +527,7 @@ __INLINE mask8 is_eq_mask(const Ipp32s a, const Ipp32s b) return (mask8)(0 - msb); } -__INLINE void extract_table_point(P521_POINT_IFMA *r, const Ipp32s digit, const P521_POINT_IFMA tbl[]) +__IPPCP_INLINE void extract_table_point(P521_POINT_IFMA *r, const Ipp32s digit, const P521_POINT_IFMA tbl[]) { Ipp32s idx = digit - 1; @@ -664,7 +664,7 @@ IPP_OWN_DEFN(void, ifma_ec_nistp521_mul_point, (P521_POINT_IFMA * r, const P521_ #define BP_WIN_SIZE BASE_POINT_WIN_SIZE #define BP_N_ENTRY BASE_POINT_N_ENTRY -__INLINE void extract_point_affine(P521_POINT_AFFINE_IFMA *r, +__IPPCP_INLINE void extract_point_affine(P521_POINT_AFFINE_IFMA *r, const P521_POINT_AFFINE_IFMA_MEM *tbl, const Ipp32s digit) { diff --git a/sources/ippcp/ecnist/ifma_ecpoint_p521.h b/sources/ippcp/ecnist/ifma_ecpoint_p521.h index 4930e868..f4575028 100644 --- a/sources/ippcp/ecnist/ifma_ecpoint_p521.h +++ b/sources/ippcp/ecnist/ifma_ecpoint_p521.h @@ -100,7 +100,7 @@ IPP_OWN_DECL(void, ifma_ec_nistp521_add_point_affine, (P521_POINT_IFMA * r, cons #include "pcpgfpstuff.h" #include "pcpgfpecstuff.h" -__INLINE void recode_point_to_mont52(P521_POINT_IFMA *pR, +__IPPCP_INLINE void recode_point_to_mont52(P521_POINT_IFMA *pR, const BNU_CHUNK_T *pP, BNU_CHUNK_T *pPool, ifmaArithMethod_p521 *method, @@ -128,7 +128,7 @@ __INLINE void recode_point_to_mont52(P521_POINT_IFMA *pR, p_to_mont(&(pR->z), pR->z); } -__INLINE void recode_point_to_mont64(IppsGFpECPoint *pR, +__IPPCP_INLINE void recode_point_to_mont64(IppsGFpECPoint *pR, P521_POINT_IFMA *pP, BNU_CHUNK_T *pPool, ifmaArithMethod_p521 *method, diff --git a/sources/ippcp/exports.linux.lib-export b/sources/ippcp/exports.linux.lib-export index 98a344d5..6c1af308 100644 --- a/sources/ippcp/exports.linux.lib-export +++ b/sources/ippcp/exports.linux.lib-export @@ -562,6 +562,12 @@ EXTERN (ippsXMSSSetSignatureState) EXTERN (ippsXMSSSignatureStateGetSize) EXTERN (ippsXMSSPublicKeyStateGetSize) EXTERN (ippsXMSSBufferGetSize) +EXTERN (ippsLMSBufferGetSize) +EXTERN (ippsLMSSignatureStateGetSize) +EXTERN (ippsLMSPublicKeyStateGetSize) +EXTERN (ippsLMSSetPublicKeyState) +EXTERN (ippsLMSSetSignatureState) +EXTERN (ippsLMSVerify) VERSION { { @@ -1130,6 +1136,12 @@ VERSION { ippsXMSSSignatureStateGetSize; ippsXMSSPublicKeyStateGetSize; ippsXMSSBufferGetSize; + ippsLMSBufferGetSize; + ippsLMSSignatureStateGetSize; + ippsLMSPublicKeyStateGetSize; + ippsLMSSetPublicKeyState; + ippsLMSSetSignatureState; + ippsLMSVerify; local: *; }; } diff --git a/sources/ippcp/exports.linux.selftests-export b/sources/ippcp/exports.linux.selftests-export index 8e8b45ca..8311fc47 100644 --- a/sources/ippcp/exports.linux.selftests-export +++ b/sources/ippcp/exports.linux.selftests-export @@ -562,6 +562,12 @@ EXTERN (ippsXMSSSetSignatureState) EXTERN (ippsXMSSSignatureStateGetSize) EXTERN (ippsXMSSPublicKeyStateGetSize) EXTERN (ippsXMSSBufferGetSize) +EXTERN (ippsLMSBufferGetSize) +EXTERN (ippsLMSSignatureStateGetSize) +EXTERN (ippsLMSPublicKeyStateGetSize) +EXTERN (ippsLMSSetPublicKeyState) +EXTERN (ippsLMSSetSignatureState) +EXTERN (ippsLMSVerify) EXTERN (ippcp_is_fips_approved_func) EXTERN (fips_selftest_ippsAESEncryptDecrypt_get_size) @@ -1183,6 +1189,12 @@ VERSION { ippsXMSSSignatureStateGetSize; ippsXMSSPublicKeyStateGetSize; ippsXMSSBufferGetSize; + ippsLMSBufferGetSize; + ippsLMSSignatureStateGetSize; + ippsLMSPublicKeyStateGetSize; + ippsLMSSetPublicKeyState; + ippsLMSSetSignatureState; + ippsLMSVerify; ippcp_is_fips_approved_func; fips_selftest_ippsAESEncryptDecrypt_get_size; diff --git a/sources/ippcp/exports.macosx.lib-export b/sources/ippcp/exports.macosx.lib-export index 931f516f..ddc7edd1 100644 --- a/sources/ippcp/exports.macosx.lib-export +++ b/sources/ippcp/exports.macosx.lib-export @@ -562,3 +562,9 @@ _ippsXMSSSetSignatureState _ippsXMSSSignatureStateGetSize _ippsXMSSPublicKeyStateGetSize _ippsXMSSBufferGetSize +_ippsLMSBufferGetSize +_ippsLMSSignatureStateGetSize +_ippsLMSPublicKeyStateGetSize +_ippsLMSSetPublicKeyState +_ippsLMSSetSignatureState +_ippsLMSVerify diff --git a/sources/ippcp/fips_cert/selftest_ecdsa_sign_verify.c b/sources/ippcp/fips_cert/selftest_ecdsa_sign_verify.c index 0c676836..593a630f 100644 --- a/sources/ippcp/fips_cert/selftest_ecdsa_sign_verify.c +++ b/sources/ippcp/fips_cert/selftest_ecdsa_sign_verify.c @@ -50,12 +50,6 @@ static const Ipp8u r[] = { 0xac,0xc2,0xc8,0x79,0x6f,0x5e,0xbb,0xca,0x7a static const Ipp8u s[] = { 0x03,0x89,0x05,0xcc,0x2a,0xda,0xcd,0x3c,0x5a,0x17,0x6f,0xe9,0x18,0xb2,0x97,0xef, 0x1c,0x37,0xf7,0x2b,0x26,0x76,0x6c,0x78,0xb2,0xa6,0x05,0xca,0x19,0x78,0xf7,0x8b }; -/* pub key coordinates */ -static const Ipp8u qx[] = { 0x83,0xbf,0x71,0xc2,0x46,0xff,0x59,0x3c,0x2f,0xb1,0xbf,0x4b,0xe9,0x5d,0x56,0xd3, - 0xcc,0x8f,0xdb,0x48,0xa2,0xbf,0x33,0xf0,0xf4,0xc7,0x5f,0x07,0x1c,0xe9,0xcb,0x1c}; -static const Ipp8u qy[] = { 0xa9,0x4c,0x9a,0xa8,0x5c,0xcd,0x7c,0xdc,0x78,0x4e,0x40,0xb7,0x93,0xca,0xb7,0x6d, - 0xe0,0x13,0x61,0x0e,0x2c,0xdb,0x1f,0x1a,0xa2,0xf9,0x11,0x88,0xc6,0x14,0x40,0xce }; - static const unsigned int primeBitSize = 256; static const unsigned int ordWordSize = 8; @@ -169,12 +163,18 @@ IPPFUN(fips_test_status, fips_selftest_ippsGFpECSignDSA, (Ipp8u *pGFpBuff, Ipp8u int gfpECBuffSize = 0; sts = fips_selftest_ippsGFpECSignVerifyDSA_get_size_GFpEC_buff(&gfpECBuffSize, pGFpBuff); - if (sts != ippStsNoErr) { return IPPCP_ALGO_SELFTEST_BAD_ARGS_ERR; } + if (sts != ippStsNoErr) { + MEMORY_FREE(pGFpBuff) + return IPPCP_ALGO_SELFTEST_BAD_ARGS_ERR; + } pGFpECBuff = malloc((size_t)gfpECBuffSize); int dataBuffSize = 0; sts = fips_selftest_ippsGFpECSignVerifyDSA_get_size_data_buff(&dataBuffSize, pGFpBuff, pGFpECBuff); - if (sts != ippStsNoErr) { return IPPCP_ALGO_SELFTEST_BAD_ARGS_ERR; } + if (sts != ippStsNoErr) { + MEMORY_FREE_2(pGFpBuff, pGFpECBuff) + return IPPCP_ALGO_SELFTEST_BAD_ARGS_ERR; + } pDataBuff = malloc((size_t)dataBuffSize); } #else @@ -325,12 +325,18 @@ IPPFUN(fips_test_status, fips_selftest_ippsGFpECVerifyDSA, (Ipp8u *pGFpBuff, Ipp int gfpECBuffSize = 0; sts = fips_selftest_ippsGFpECSignVerifyDSA_get_size_GFpEC_buff(&gfpECBuffSize, pGFpBuff); - if (sts != ippStsNoErr) { return IPPCP_ALGO_SELFTEST_BAD_ARGS_ERR; } + if (sts != ippStsNoErr) { + MEMORY_FREE(pGFpBuff) + return IPPCP_ALGO_SELFTEST_BAD_ARGS_ERR; + } pGFpECBuff = malloc((size_t)gfpECBuffSize); int dataBuffSize = 0; sts = fips_selftest_ippsGFpECSignVerifyDSA_get_size_data_buff(&dataBuffSize, pGFpBuff, pGFpECBuff); - if (sts != ippStsNoErr) { return IPPCP_ALGO_SELFTEST_BAD_ARGS_ERR; } + if (sts != ippStsNoErr) { + MEMORY_FREE_2(pGFpBuff, pGFpECBuff) + return IPPCP_ALGO_SELFTEST_BAD_ARGS_ERR; + } pDataBuff = malloc((size_t)dataBuffSize); } #else @@ -472,12 +478,18 @@ IPPFUN(fips_test_status, fips_selftest_ippsGFpECPrivateKey, (Ipp8u *pGFpBuff, Ip int gfpECBuffSize = 0; sts = fips_selftest_ippsGFpECSignVerifyDSA_get_size_GFpEC_buff(&gfpECBuffSize, pGFpBuff); - if (sts != ippStsNoErr) { return IPPCP_ALGO_SELFTEST_BAD_ARGS_ERR; } + if (sts != ippStsNoErr) { + MEMORY_FREE(pGFpBuff) + return IPPCP_ALGO_SELFTEST_BAD_ARGS_ERR; + } pGFpECBuff = malloc((size_t)gfpECBuffSize); int dataBuffSize = 0; sts = fips_selftest_ippsGFpECSignVerifyDSA_get_size_data_buff(&dataBuffSize, pGFpBuff, pGFpECBuff); - if (sts != ippStsNoErr) { return IPPCP_ALGO_SELFTEST_BAD_ARGS_ERR; } + if (sts != ippStsNoErr) { + MEMORY_FREE_2(pGFpBuff, pGFpECBuff) + return IPPCP_ALGO_SELFTEST_BAD_ARGS_ERR; + } pDataBuff = malloc((size_t)dataBuffSize); } #else diff --git a/sources/ippcp/gsmod_montinv.c b/sources/ippcp/gsmod_montinv.c index 3c989807..19fe91bf 100644 --- a/sources/ippcp/gsmod_montinv.c +++ b/sources/ippcp/gsmod_montinv.c @@ -14,11 +14,11 @@ * limitations under the License. *************************************************************************/ -/* -// +/* +// // Purpose: // Cryptography Primitive. Modular Arithmetic Engine. General Functionality -// +// // Contents: // gs_mont_inv() // @@ -31,7 +31,7 @@ #include "gsmodstuff.h" #include "pcpmask_ct.h" -__INLINE BNU_CHUNK_T* cpPow2_ct(int bit, BNU_CHUNK_T* dst, int len) +__IPPCP_INLINE BNU_CHUNK_T* cpPow2_ct(int bit, BNU_CHUNK_T* dst, int len) { int slot = bit/BNU_CHUNK_BITS; BNU_CHUNK_T value = (BNU_CHUNK_T)1 << (bit%BNU_CHUNK_BITS); diff --git a/sources/ippcp/gsmodmethod.h b/sources/ippcp/gsmodmethod.h index d9462215..e84b0526 100644 --- a/sources/ippcp/gsmodmethod.h +++ b/sources/ippcp/gsmodmethod.h @@ -54,17 +54,17 @@ typedef struct _gsModMethod { /* These functions should not be used, because they have non-constant execution time, see their safe analogues in pcpmask_ct.h */ #if 0 -__INLINE BNU_CHUNK_T cpIsZero(BNU_CHUNK_T x) +__IPPCP_INLINE BNU_CHUNK_T cpIsZero(BNU_CHUNK_T x) { return x==0; } -__INLINE BNU_CHUNK_T cpIsNonZero(BNU_CHUNK_T x) +__IPPCP_INLINE BNU_CHUNK_T cpIsNonZero(BNU_CHUNK_T x) { return x!=0; } -__INLINE BNU_CHUNK_T cpIsOdd(BNU_CHUNK_T x) +__IPPCP_INLINE BNU_CHUNK_T cpIsOdd(BNU_CHUNK_T x) { return x&1; } -__INLINE BNU_CHUNK_T cpIsEven(BNU_CHUNK_T x) +__IPPCP_INLINE BNU_CHUNK_T cpIsEven(BNU_CHUNK_T x) { return 1-cpIsOdd(x); } /* dst[] = (flag)? src[] : dst[] */ -__INLINE void cpMaskMove_gs(BNU_CHUNK_T* dst, const BNU_CHUNK_T* src, int len, BNU_CHUNK_T moveFlag) +__IPPCP_INLINE void cpMaskMove_gs(BNU_CHUNK_T* dst, const BNU_CHUNK_T* src, int len, BNU_CHUNK_T moveFlag) { BNU_CHUNK_T srcMask = 0-cpIsNonZero(moveFlag); BNU_CHUNK_T dstMask = ~srcMask; diff --git a/sources/ippcp/gsmodstuff.h b/sources/ippcp/gsmodstuff.h index 25de7105..bd17d129 100644 --- a/sources/ippcp/gsmodstuff.h +++ b/sources/ippcp/gsmodstuff.h @@ -101,7 +101,7 @@ typedef struct _gsModEngine // poolReq Required pool *F*/ -__INLINE BNU_CHUNK_T* gsModPoolAlloc(gsModEngine* pME, int poolReq) +__IPPCP_INLINE BNU_CHUNK_T* gsModPoolAlloc(gsModEngine* pME, int poolReq) { BNU_CHUNK_T* pPool = MOD_BUFFER(pME, pME->poolLenUsed); @@ -126,7 +126,7 @@ __INLINE BNU_CHUNK_T* gsModPoolAlloc(gsModEngine* pME, int poolReq) // poolReq Required pool *F*/ -__INLINE void gsModPoolFree(gsModEngine* pME, int poolReq) +__IPPCP_INLINE void gsModPoolFree(gsModEngine* pME, int poolReq) { if(pME->poolLenUsed < poolReq) poolReq = pME->poolLenUsed; diff --git a/sources/ippcp/ifma_exp52x20.c b/sources/ippcp/ifma_exp52x20.c index 9ad9f37c..2f186235 100644 --- a/sources/ippcp/ifma_exp52x20.c +++ b/sources/ippcp/ifma_exp52x20.c @@ -34,7 +34,7 @@ #define AMM ifma256_amm52x20 #define AMS ifma256_ams52x20 -__INLINE void extract_multiplier(Ipp64u *red_Y, +__IPPCP_INLINE void extract_multiplier(Ipp64u *red_Y, const Ipp64u red_table[1U << EXP_WIN_SIZE][LEN52], int red_table_idx) { diff --git a/sources/ippcp/ifma_exp52x20_dual.c b/sources/ippcp/ifma_exp52x20_dual.c index f6e5604a..fc69cd54 100644 --- a/sources/ippcp/ifma_exp52x20_dual.c +++ b/sources/ippcp/ifma_exp52x20_dual.c @@ -34,7 +34,7 @@ #define DAMM ifma256_amm52x20_dual #define DAMS ifma256_ams52x20_dual -__INLINE void extract_multiplier_n(Ipp64u *red_Y, +__IPPCP_INLINE void extract_multiplier_n(Ipp64u *red_Y, const Ipp64u red_table[1U << EXP_WIN_SIZE][2][LEN52], int red_table_idx, int tbl_idx) { diff --git a/sources/ippcp/ifma_exp52x30_dual.c b/sources/ippcp/ifma_exp52x30_dual.c index 73b42356..ed432581 100644 --- a/sources/ippcp/ifma_exp52x30_dual.c +++ b/sources/ippcp/ifma_exp52x30_dual.c @@ -34,7 +34,7 @@ #define DAMM ifma256_amm52x30_dual #define DAMS ifma256_ams52x30_dual -__INLINE void extract_multiplier_n(Ipp64u *red_Y, +__IPPCP_INLINE void extract_multiplier_n(Ipp64u *red_Y, const Ipp64u red_table[1U << EXP_WIN_SIZE][2][LEN52], int red_table_idx, int tbl_idx) { diff --git a/sources/ippcp/ifma_exp52x40_dual.c b/sources/ippcp/ifma_exp52x40_dual.c index a9da4bc0..2bf17ed9 100644 --- a/sources/ippcp/ifma_exp52x40_dual.c +++ b/sources/ippcp/ifma_exp52x40_dual.c @@ -34,7 +34,7 @@ #define DAMM ifma256_amm52x40_dual #define DAMS ifma256_ams52x40_dual -__INLINE void extract_multiplier_n(Ipp64u *red_Y, +__IPPCP_INLINE void extract_multiplier_n(Ipp64u *red_Y, const Ipp64u red_table[1U << EXP_WIN_SIZE][2][LEN52], int red_table_idx, int tbl_idx) { diff --git a/sources/ippcp/ifma_math_avx512vl.h b/sources/ippcp/ifma_math_avx512vl.h index c5c26426..1873671a 100644 --- a/sources/ippcp/ifma_math_avx512vl.h +++ b/sources/ippcp/ifma_math_avx512vl.h @@ -45,11 +45,11 @@ #define SIMD_BYTES (SIMD_LEN/8) #define SIMD_QWORDS (SIMD_LEN/64) - __INLINE U64 loadu64(const void *p) { + __IPPCP_INLINE U64 loadu64(const void *p) { return _mm256_loadu_si256((U64*)p); } - __INLINE void storeu64(const void *p, U64 v) { + __IPPCP_INLINE void storeu64(const void *p, U64 v) { _mm256_storeu_si256((U64*)p, v); } @@ -80,7 +80,7 @@ __asm__ ( "vpmadd52huq " #o "(%2), %1, %0" : "+x" (r): "x" (b), "r" (c) ); \ } #else - /* Use IFMA instrinsics for all other compilers */ + /* Use IFMA intrinsics for all other compilers */ static U64 fma52lo(U64 a, U64 b, U64 c) { return _mm256_madd52lo_epu64(a, b, c); @@ -102,7 +102,7 @@ } #endif - __INLINE U64 mul52lo(U64 b, U64 c) + __IPPCP_INLINE U64 mul52lo(U64 b, U64 c) { return fma52lo(_mm256_setzero_si256(), b, c); } @@ -110,44 +110,44 @@ #define fma52lo_mem(r, a, b, c, o) _mm_madd52lo_epu64_(r, a, b, c, o) #define fma52hi_mem(r, a, b, c, o) _mm_madd52hi_epu64_(r, a, b, c, o) - __INLINE U64 add64(U64 a, U64 b) + __IPPCP_INLINE U64 add64(U64 a, U64 b) { return _mm256_add_epi64(a, b); } - __INLINE U64 sub64(U64 a, U64 b) + __IPPCP_INLINE U64 sub64(U64 a, U64 b) { return _mm256_sub_epi64(a, b); } - __INLINE U64 get_zero64() + __IPPCP_INLINE U64 get_zero64() { return _mm256_setzero_si256(); } - __INLINE void set_zero64(U64 *a) + __IPPCP_INLINE void set_zero64(U64 *a) { *a = _mm256_xor_si256(*a, *a); } - __INLINE U64 set1(unsigned long long a) + __IPPCP_INLINE U64 set1(unsigned long long a) { return _mm256_set1_epi64x((long long)a); } - __INLINE U64 srli64(U64 a, int s) + __IPPCP_INLINE U64 srli64(U64 a, int s) { return _mm256_srli_epi64(a, s); } #define slli64 _mm256_slli_epi64 - __INLINE U64 and64_const(U64 a, unsigned long long mask) + __IPPCP_INLINE U64 and64_const(U64 a, unsigned long long mask) { return _mm256_and_si256(a, _mm256_set1_epi64x((long long)mask)); } - __INLINE U64 and64(U64 a, U64 mask) + __IPPCP_INLINE U64 and64(U64 a, U64 mask) { return _mm256_and_si256(a, mask); } diff --git a/sources/ippcp/ippcp.def b/sources/ippcp/ippcp.def index 9f66d9f5..6fbcc669 100644 --- a/sources/ippcp/ippcp.def +++ b/sources/ippcp/ippcp.def @@ -564,3 +564,9 @@ ippsXMSSSetSignatureState ippsXMSSSignatureStateGetSize ippsXMSSPublicKeyStateGetSize ippsXMSSBufferGetSize +ippsLMSBufferGetSize +ippsLMSSignatureStateGetSize +ippsLMSPublicKeyStateGetSize +ippsLMSSetPublicKeyState +ippsLMSSetSignatureState +ippsLMSVerify diff --git a/sources/ippcp/ippcp_fips_selftests.def b/sources/ippcp/ippcp_fips_selftests.def index 9d321e40..cdc59d2d 100644 --- a/sources/ippcp/ippcp_fips_selftests.def +++ b/sources/ippcp/ippcp_fips_selftests.def @@ -564,6 +564,12 @@ ippsXMSSSetSignatureState ippsXMSSSignatureStateGetSize ippsXMSSPublicKeyStateGetSize ippsXMSSBufferGetSize +ippsLMSBufferGetSize +ippsLMSSignatureStateGetSize +ippsLMSPublicKeyStateGetSize +ippsLMSSetPublicKeyState +ippsLMSSetSignatureState +ippsLMSVerify ippcp_is_fips_approved_func diff --git a/sources/ippcp/lms/lms_setters_getters.c b/sources/ippcp/lms/lms_setters_getters.c new file mode 100644 index 00000000..204bd912 --- /dev/null +++ b/sources/ippcp/lms/lms_setters_getters.c @@ -0,0 +1,301 @@ +/************************************************************************* +* Copyright (C) 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*************************************************************************/ + +#include "owndefs.h" +#include "lms_internal/lms.h" + +/*F* +// Name: ippsLMSBufferGetSize +// +// Purpose: Get the LMS temporary buffer size (bytes). +// +// Returns: Reason: +// ippStsNullPtrErr pSize == NULL +// ippStsBadArgErr lmsType.lmotsOIDAlgo > LMOTS_SHA256_N24_W8 +// lmsType.lmotsOIDAlgo < LMOTS_SHA256_N32_W1 +// lmsType.lmsOIDAlgo > LMS_SHA256_M24_H25 +// lmsType.lmsOIDAlgo < LMS_SHA256_M32_H5 +// ippStsLengthErr maxMessageLength < 1 +// maxMessageLength > (Ipp32s)(IPP_MAX_32S) - +// - (byteSizeI + 4(q byteSize) + 2(D_MESG byteSize) + n(C byteSize)) +// ippStsNoErr no errors +// +// Parameters: +// pSize pointer to the work buffer's byte size +// maxMessageLength maximum length of the processing message +// lmsType structure with LMS parameters lmotsOIDAlgo and lmsOIDAlgo +// +*F*/ + +IPPFUN(IppStatus, ippsLMSBufferGetSize, (Ipp32s* pSize, Ipp32s maxMessageLength, const IppsLMSAlgoType lmsType)) +{ + IppStatus ippcpSts = ippStsNoErr; + + /* Input parameters check */ + IPP_BAD_PTR1_RET(pSize); + IPP_BADARG_RET(lmsType.lmotsOIDAlgo > LMOTS_SHA256_N24_W8, ippStsBadArgErr); + IPP_BADARG_RET(lmsType.lmotsOIDAlgo < LMOTS_SHA256_N32_W1, ippStsBadArgErr); + IPP_BADARG_RET(lmsType.lmsOIDAlgo > LMS_SHA256_M24_H25, ippStsBadArgErr); + IPP_BADARG_RET(lmsType.lmsOIDAlgo < LMS_SHA256_M32_H5, ippStsBadArgErr); + + + /* Set LMOTS and LMS parameters */ + cpLMOTSParams lmotsParams; + ippcpSts = setLMOTSParams(lmsType.lmotsOIDAlgo, &lmotsParams); + IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts) + cpLMSParams lmsParams; + ippcpSts = setLMSParams(lmsType.lmsOIDAlgo, &lmsParams); + IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts) + + /* Check message length */ + IPP_BADARG_RET(maxMessageLength < 1, ippStsLengthErr); + // this restriction is needed to avoid overflow of Ipp32s + // maxMessageLength must be less than IPP_MAX_32S - (CP_PK_I_BYTESIZE + q + D_MESG + C ) + IPP_BADARG_RET(maxMessageLength > (Ipp32s)((IPP_MAX_32S) - (CP_PK_I_BYTESIZE + 4 + 2 + lmotsParams.n)), + ippStsLengthErr); + + /* Calculate the maximum Set LMOTS and LMS parameters */ + // pubKey->I || q || D_MESG || C || pMsg + Ipp32u lenBufQ = CP_PK_I_BYTESIZE + 4 + 2 + lmotsParams.n + (Ipp32u)maxMessageLength; + // pubKey->I || q || i || j || Y[i] + Ipp32u lenBufTmp = CP_PK_I_BYTESIZE + 4 + 2 + 1 + lmotsParams.n; + // pubKey->I || node_num || D_LEAF || Kc + Ipp32u lenBufTc = CP_PK_I_BYTESIZE + 4 + 2 + lmotsParams.n; + // pubKey->I || node_num/2 || D_INTR || path[i] || tmp + Ipp32u lenBufIntr = CP_PK_I_BYTESIZE + 4 + 2 + lmotsParams.n + lmotsParams.n; + + *pSize = (Ipp32s)IPP_MAX(IPP_MAX(IPP_MAX(lenBufQ, lenBufTmp), lenBufTc), lenBufIntr); + + return ippcpSts; +} + +/*F* +// Name: ippsLMSSignatureStateGetSize +// +// Purpose: Get the LMS signature state size (bytes). +// +// Returns: Reason: +// ippStsNullPtrErr pSize == NULL +// ippStsBadArgErr lmsType.lmotsOIDAlgo > LMOTS_SHA256_N24_W8 +// lmsType.lmotsOIDAlgo < LMOTS_SHA256_N32_W1 +// lmsType.lmsOIDAlgo > LMS_SHA256_M24_H25 +// lmsType.lmsOIDAlgo < LMS_SHA256_M32_H5 +// ippStsNoErr no errors +// +// Parameters: +// pSize pointer to the size +// lmsType structure with LMS parameters lmotsOIDAlgo and lmsOIDAlgo +// +*F*/ + +IPPFUN(IppStatus, ippsLMSSignatureStateGetSize, (Ipp32s* pSize, const IppsLMSAlgoType lmsType)) +{ + IppStatus ippcpSts = ippStsNoErr; + + IPP_BAD_PTR1_RET(pSize); + IPP_BADARG_RET(lmsType.lmotsOIDAlgo > LMOTS_SHA256_N24_W8, ippStsBadArgErr); + IPP_BADARG_RET(lmsType.lmotsOIDAlgo < LMOTS_SHA256_N32_W1, ippStsBadArgErr); + IPP_BADARG_RET(lmsType.lmsOIDAlgo > LMS_SHA256_M24_H25, ippStsBadArgErr); + IPP_BADARG_RET(lmsType.lmsOIDAlgo < LMS_SHA256_M32_H5, ippStsBadArgErr); + + /* Set LMOTS and LMS parameters */ + cpLMOTSParams lmotsParams; + ippcpSts = setLMOTSParams(lmsType.lmotsOIDAlgo, &lmotsParams); + IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts) + cpLMSParams lmsParams; + ippcpSts = setLMSParams(lmsType.lmsOIDAlgo, &lmsParams); + IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts) + + *pSize = (Ipp32s)sizeof(IppsLMSSignatureState) + + (Ipp32s)(lmotsParams.n * lmsParams.h) + /*_pAuthPath*/ + (Ipp32s)lmotsParams.n + /* C */ + (Ipp32s)(lmotsParams.n * lmotsParams.p); /* Y */ + + return ippcpSts; +} + +/*F* +// Name: ippsLMSPublicKeyStateGetSize +// +// Purpose: Provides the LMS public key state size (bytes). +// +// Returns: Reason: +// ippStsNullPtrErr pSize == NULL +// ippStsBadArgErr lmsType.lmotsOIDAlgo > LMOTS_SHA256_N24_W8 +// lmsType.lmotsOIDAlgo < LMOTS_SHA256_N32_W1 +// lmsType.lmsOIDAlgo > LMS_SHA256_M24_H25 +// lmsType.lmsOIDAlgo < LMS_SHA256_M32_H5 +// ippStsNoErr no errors +// +// Parameters: +// pSize pointer to the size +// lmsType structure with LMS parameters lmotsOIDAlgo and lmsOIDAlgo +// +*F*/ +IPPFUN(IppStatus, ippsLMSPublicKeyStateGetSize, (Ipp32s* pSize, const IppsLMSAlgoType lmsType)) +{ + IppStatus ippcpSts = ippStsNoErr; + + IPP_BAD_PTR1_RET(pSize); + IPP_BADARG_RET(lmsType.lmotsOIDAlgo > LMOTS_SHA256_N24_W8, ippStsBadArgErr); + IPP_BADARG_RET(lmsType.lmotsOIDAlgo < LMOTS_SHA256_N32_W1, ippStsBadArgErr); + IPP_BADARG_RET(lmsType.lmsOIDAlgo > LMS_SHA256_M24_H25, ippStsBadArgErr); + IPP_BADARG_RET(lmsType.lmsOIDAlgo < LMS_SHA256_M32_H5, ippStsBadArgErr); + + /* Set LMS parameters */ + cpLMSParams lmsParams; + ippcpSts = setLMSParams(lmsType.lmsOIDAlgo, &lmsParams); + IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts) + + *pSize = (Ipp32s)sizeof(IppsLMSPublicKeyState) + + (Ipp32s)lmsParams.m; /* T1 */ + + return ippcpSts; +} + +/*F* +// Name: ippsLMSSetPublicKeyState +// +// Purpose: Set LMS public key. +// +// Returns: Reason: +// ippStsNullPtrErr pI == NULL +// pK == NULL +// pState == NULL +// ippStsBadArgErr lmsType.lmotsOIDAlgo > LMOTS_SHA256_N24_W8 +// lmsType.lmotsOIDAlgo < LMOTS_SHA256_N32_W1 +// lmsType.lmsOIDAlgo > LMS_SHA256_M24_H25 +// lmsType.lmsOIDAlgo < LMS_SHA256_M32_H5 +// ippStsNoErr no errors +// +// Parameters: +// lmsType structure with LMS parameters lmotsOIDAlgo and lmsOIDAlgo +// pI pointer to the LMS private key identifier +// pK pointer to the LMS public key +// pState pointer to the LMS public key state +// +*F*/ +IPPFUN(IppStatus, ippsLMSSetPublicKeyState, (const IppsLMSAlgoType lmsType, + const Ipp8u* pI, const Ipp8u* pK, + IppsLMSPublicKeyState* pState)) +{ + IppStatus ippcpSts = ippStsNoErr; + + IPP_BAD_PTR3_RET(pI, pK, pState); + IPP_BADARG_RET(lmsType.lmotsOIDAlgo > LMOTS_SHA256_N24_W8, ippStsBadArgErr); + IPP_BADARG_RET(lmsType.lmotsOIDAlgo < LMOTS_SHA256_N32_W1, ippStsBadArgErr); + IPP_BADARG_RET(lmsType.lmsOIDAlgo > LMS_SHA256_M24_H25, ippStsBadArgErr); + IPP_BADARG_RET(lmsType.lmsOIDAlgo < LMS_SHA256_M32_H5, ippStsBadArgErr); + + /* Set context id to prevent its copying */ + CP_LMS_SET_CTX_ID(pState); + + /* Set LMS parameters */ + cpLMSParams lmsParams; + ippcpSts = setLMSParams(lmsType.lmsOIDAlgo, &lmsParams); + IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts) + + /* Fill in the structure */ + pState->lmsOIDAlgo = lmsType.lmsOIDAlgo; + pState->lmotsOIDAlgo = lmsType.lmotsOIDAlgo; + CopyBlock(pI, pState->I, CP_PK_I_BYTESIZE); + // Set pointer to T1 right to the end of the context + pState->T1 = (Ipp8u*)pState+sizeof(IppsLMSPublicKeyState); + CopyBlock(pK, pState->T1, (cpSize)lmsParams.m); + + return ippcpSts; +} + +/*F* +// Name: ippsLMSSetSignatureState +// +// Purpose: Set LMS signature. +// +// Returns: Reason: +// ippStsNullPtrErr pC == NULL +// pY == NULL +// pAuthPath == NULL +// pState == NULL +// ippStsBadArgErr lmsType.lmotsOIDAlgo > LMOTS_SHA256_N24_W8 +// lmsType.lmotsOIDAlgo < LMOTS_SHA256_N32_W1 +// lmsType.lmsOIDAlgo > LMS_SHA256_M24_H25 +// lmsType.lmsOIDAlgo < LMS_SHA256_M32_H5 +// q is incorrect +// ippStsNoErr no errors +// +// Parameters: +// lmsType structure with LMS parameters lmotsOIDAlgo and lmsOIDAlgo +// q index of LMS leaf +// pC pointer to the C LM-OTS value +// pY pointer to the y LM-OTS value +// pAuthPath pointer to the LMS authorization path +// pState pointer to the LMS signature state +// +*F*/ + +IPPFUN(IppStatus, ippsLMSSetSignatureState, (const IppsLMSAlgoType lmsType, + Ipp32u q, + const Ipp8u* pC, + const Ipp8u* pY, + const Ipp8u* pAuthPath, + IppsLMSSignatureState* pState)) +{ + IPP_BAD_PTR4_RET(pC, pY, pAuthPath, pState); + IPP_BADARG_RET(lmsType.lmotsOIDAlgo > LMOTS_SHA256_N24_W8, ippStsBadArgErr); + IPP_BADARG_RET(lmsType.lmotsOIDAlgo < LMOTS_SHA256_N32_W1, ippStsBadArgErr); + IPP_BADARG_RET(lmsType.lmsOIDAlgo > LMS_SHA256_M24_H25, ippStsBadArgErr); + IPP_BADARG_RET(lmsType.lmsOIDAlgo < LMS_SHA256_M32_H5, ippStsBadArgErr); + + IppStatus ippcpSts = ippStsNoErr; + + /* Set LMOTS and LMS parameters */ + cpLMOTSParams lmotsParams; + ippcpSts = setLMOTSParams(lmsType.lmotsOIDAlgo, &lmotsParams); + IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts) + cpLMSParams lmsParams; + ippcpSts = setLMSParams(lmsType.lmsOIDAlgo, &lmsParams); + IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts) + + /* Set context id to prevent its copying */ + CP_LMS_SET_CTX_ID(pState); + + /* Check q value before set */ + Ipp32u qLimit = 1 << lmsParams.h; + IPP_BADARG_RET(q >= qLimit, ippStsBadArgErr); + + pState->_q = q; + pState->_lmsOIDAlgo = lmsType.lmsOIDAlgo; + + _cpLMOTSSignatureState* locLMOTSSig = &(pState->_lmotsSig); + locLMOTSSig->_lmotsOIDAlgo = lmsType.lmotsOIDAlgo; + + // Copy auth path data + Ipp32s authPathSize = (Ipp32s)(lmsParams.h * lmotsParams.n); + pState->_pAuthPath = (Ipp8u*)pState+sizeof(IppsLMSSignatureState); + CopyBlock(pAuthPath, pState->_pAuthPath, authPathSize); + + // Copy C data + Ipp32s cSize = (Ipp32s)lmotsParams.n; + locLMOTSSig->pC = (Ipp8u*)pState->_pAuthPath+authPathSize; + CopyBlock(pC, locLMOTSSig->pC, cSize); + + // Copy Y data + Ipp32s ySize = (Ipp32s)(lmotsParams.n * lmotsParams.p); + locLMOTSSig->pY = (Ipp8u*)pState->_pAuthPath+authPathSize+cSize; + CopyBlock(pY, locLMOTSSig->pY, ySize); + + return ippcpSts; +} diff --git a/sources/ippcp/lms/lms_verify.c b/sources/ippcp/lms/lms_verify.c new file mode 100644 index 00000000..4c78a354 --- /dev/null +++ b/sources/ippcp/lms/lms_verify.c @@ -0,0 +1,223 @@ +/************************************************************************* +* Copyright (C) 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*************************************************************************/ +#include "lms_internal/lms.h" + +/*F* +// Name: ippsLMSVerify +// +// Purpose: LMS signature verification. +// +// Returns: Reason: +// ippStsNullPtrErr pMsg == NULL +// pSign == NULL +// pIsSignValid == NULL +// pKey == NULL +// pBuffer == NULL +// ippStsBadArgErr wrong LMS or LMOTS parameters +// inside pSign and pKey +// OR q is incorrect +// ippStsContextMatchErr pSign or pKey contexts are invalid +// ippStsLengthErr msgLen < 1 +// ippStsNoErr no errors +// +// Parameters: +// pMsg pointer to the message data buffer +// msgLen message buffer length, bytes +// pSign pointer to the LMS signature state +// pIsSignValid 1 if signature is valid, 0 - vice versa +// pKey pointer to the LMS public key state +// pBuffer pointer to the temporary memory +// +*F*/ + +IPPFUN(IppStatus, ippsLMSVerify, (const Ipp8u* pMsg, const Ipp32s msgLen, + const IppsLMSSignatureState* pSign, + int* pIsSignValid, + const IppsLMSPublicKeyState* pKey, + Ipp8u* pBuffer)) +{ + IppStatus ippcpSts = ippStsNoErr; + + /* Check if any of input pointers are NULL */ + IPP_BAD_PTR4_RET(pMsg, pSign, pIsSignValid, pKey) + /* Check if temporary buffer is NULL */ + IPP_BAD_PTR1_RET(pBuffer) + /* Check msg length */ + IPP_BADARG_RET(msgLen < 1, ippStsLengthErr) + IPP_BADARG_RET( !CP_LMS_VALID_CTX_ID(pSign), ippStsContextMatchErr ); + IPP_BADARG_RET( !CP_LMS_VALID_CTX_ID(pKey), ippStsContextMatchErr ); + *pIsSignValid = 0; + + /* Parse public key(Pk) */ + /* --------------------------------------------- */ + IppsLMSAlgo lmsTypePk = pKey->lmsOIDAlgo; + IppsLMOTSAlgo lmotsTypePk = pKey->lmotsOIDAlgo; + + // Set LMOTS and LMS parameters + cpLMOTSParams lmotsParams; + cpLMSParams lmsParams; + ippcpSts = setLMOTSParams(lmotsTypePk, &lmotsParams); + IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts) + ippcpSts = setLMSParams(lmsTypePk, &lmsParams); + IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts) + Ipp32u nParam = lmotsParams.n; + Ipp32u wParam = lmotsParams.w; + Ipp32u pParam = lmotsParams.p; + Ipp32u hParam = lmsParams.h; + Ipp32u mParam = lmsParams.m; + + /* Parse signature */ + /* ---------------------------------------------------- */ + Ipp32u q = pSign->_q; + _cpLMOTSSignatureState lmotsSig = pSign->_lmotsSig; + IppsLMOTSAlgo lmotsTypeSig = lmotsSig._lmotsOIDAlgo; + IppsLMSAlgo lmsTypeSig = pSign->_lmsOIDAlgo; + Ipp8u* pAuthPath = pSign->_pAuthPath; + + // Check the validity of the parsed signature parameters + Ipp32u qLimit = 1 << hParam; + if((lmsTypePk != lmsTypeSig) || (lmotsTypePk != lmotsTypeSig) || (q >= qLimit)) + { + return ippStsBadArgErr; + } + + /* Compute LMS pub key candidate (Algorithms 6a and 4b) */ + /* ---------------------------------------------------- */ + Ipp8u* tmpQBuf = pBuffer; + Ipp32u total_size = 0; + // Buffer's invariant for alg correctness - first 16 bytes is always pubKey->I + CopyBlock(pKey->I, tmpQBuf, CP_PK_I_BYTESIZE); total_size+=CP_PK_I_BYTESIZE; + toByte(tmpQBuf+total_size, /*q byteLen*/ 4, q); total_size += /*q byteLen*/ 4; + toByte(tmpQBuf+total_size, /*D_MESG byteLen*/ 2, D_MESG); total_size += /*D_MESG byteLen*/ 2; + CopyBlock(lmotsSig.pC, tmpQBuf+total_size, (cpSize)nParam); total_size += nParam; + CopyBlock(pMsg, tmpQBuf+total_size, msgLen); total_size += (Ipp32u)msgLen; + + // Q = H(I || u32str(q) || u16str(D_MESG) || C || message) + Ipp8u Q_CksmQ[CP_LMS_MAX_HASH_BYTESIZE+CP_CKSM_BYTESIZE]; + ippcpSts = ippsHashMessage_rmf(tmpQBuf, (int)total_size, Q_CksmQ, lmsParams.hash_method); + IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts) + + /* Calculate checksum Cksm(Q) and append it to Q */ + Ipp32u cksmQ = cpCksm(Q_CksmQ, lmotsParams); + toByte(Q_CksmQ+nParam, /*cksmQ byteLen*/2, cksmQ); + + Ipp8u z[CP_SIG_MAX_Y_WORDSIZE+1][CP_LMS_MAX_HASH_BYTESIZE]; + Ipp8u* pZ = z[0]; + + for(Ipp32u i = 0; i < pParam; i++) { + // a = coef(Q || Cksm(Q), i, w) + Ipp32u a = cpCoef(Q_CksmQ, i, wParam); + //tmp = y[i] + Ipp8u tmp[CP_LMS_MAX_HASH_BYTESIZE]; + CopyBlock(lmotsSig.pY + i*nParam, tmp, (cpSize)nParam); + + // I || u32str(q) + Ipp8u* tmpBuff = pBuffer; + // I || u32str(q) || u16str(i) + toByte(tmpBuff+CP_PK_I_BYTESIZE+/*q byteLen*/4,/*i byteLen*/2,i); + for(Ipp32u j = a; j < (Ipp32u)((1 << wParam) - 1); j++) { + // I || u32str(q) || u16str(i) || u8str(j) + toByte(tmpBuff+CP_PK_I_BYTESIZE+/*q byteLen*/4+/*i byteLen*/2,/*j byteLen*/1,j); + // I || u32str(q) || u16str(i) || u8str(j) || tmp + CopyBlock(tmp, tmpBuff+CP_PK_I_BYTESIZE+/*q byteLen*/4+/*i byteLen*/2+/*j byteLen*/1, (cpSize)nParam); + // tmp = H(I || u32str(q) || u16str(i) || u8str(j) || tmp) + ippcpSts = ippsHashMessage_rmf(tmpBuff, + (int)(CP_PK_I_BYTESIZE+/*q byteLen*/4+/*i byteLen*/2+/*j byteLen*/1+nParam), + tmp, + lmsParams.hash_method); + IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts) + } + CopyBlock(tmp, pZ+(i+1)*nParam, (cpSize)nParam); + } + // I u32str(q) u16str(D_PBLC) + Ipp32s zStartOffset = (Ipp32s)(nParam - (CP_PK_I_BYTESIZE + 4 + 2 )); + // I u16str(D_PBLC) + CopyBlock(tmpQBuf, pZ + zStartOffset, CP_PK_I_BYTESIZE + 4 ); + // Conduct operation u16str(D_PBLC) + toByte(pZ + nParam - /*D_PBLC byteLen*/2, /*D_PBLC byteLen*/2, D_PBLC); + // tmp = Kc = H(I || u32str(q) || u16str(D_PBLC) || z[0] || z[1] || ... || z[p-1]) + Ipp8u Kc[CP_LMS_MAX_HASH_BYTESIZE]; + ippcpSts = ippsHashMessage_rmf(pZ+zStartOffset, + (int)(pParam*nParam+CP_PK_I_BYTESIZE+/*q byteLen*/4+/*D_PBLC byteLen*/2), + Kc, + lmsParams.hash_method); + IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts) + + /* Compute the candidate LMS root value Tc */ + /* --------------------------------------------- */ + Ipp32u node_num = (1 << hParam) + q; + Ipp8u* tmpBuffKc = pBuffer; + // I || u32str(node_num) + toByte(tmpBuffKc+CP_PK_I_BYTESIZE, /*node_num byteLen*/4, node_num); + // I || u32str(node_num) || u16str(D_LEAF) + toByte(tmpBuffKc+CP_PK_I_BYTESIZE+/*node_num byteLen*/4, /*D_LEAF byteLen*/2, D_LEAF); + // I || u32str(node_num) || u16str(D_LEAF) || Kc + CopyBlock(Kc, tmpBuffKc+CP_PK_I_BYTESIZE+/*node_num byteLen*/4+/*D_LEAF byteLen*/2, (cpSize)mParam); + Ipp8u tmp[CP_LMS_MAX_HASH_BYTESIZE]; + ippcpSts = ippsHashMessage_rmf(tmpBuffKc, + (int)(CP_PK_I_BYTESIZE+/*node_num byteLen*/4+/*D_LEAF byteLen*/2+mParam), + tmp, + lmsParams.hash_method); + IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts) + + Ipp32u i = 0; + Ipp8u* locTmp = pBuffer; + // I || u32str(node_num/2) || u16str(D_INTR) + toByte(locTmp+CP_PK_I_BYTESIZE+/*node_num byteLen*/4, /*D_INTR byteLen*/2, D_INTR); + while (node_num > 1) { + // I || u32str(node_num/2) + toByte(locTmp+CP_PK_I_BYTESIZE, /*node_num byteLen*/4, node_num/2); + + if((node_num & 1) == 1) { + // I || u32str(node_num/2) || u16str(D_INTR) || path[i] + CopyBlock(pAuthPath+i*mParam, + locTmp+CP_PK_I_BYTESIZE+/*node_num byteLen*/4+/*D_INTR byteLen*/2, + (cpSize)mParam); + // I || u32str(node_num/2) || u16str(D_INTR) || path[i] || tmp + CopyBlock(tmp, + locTmp+CP_PK_I_BYTESIZE+/*node_num byteLen*/4+/*D_INTR byteLen*/2+mParam, + (cpSize)mParam); + } + else { + // I || u32str(node_num/2) || u16str(D_INTR) || tmp + CopyBlock(tmp, + locTmp+CP_PK_I_BYTESIZE+/*node_num byteLen*/4+/*D_INTR byteLen*/2, + (cpSize)mParam); + // I || u32str(node_num/2) || u16str(D_INTR) || tmp || path[i] + CopyBlock(pAuthPath+i*mParam, + locTmp+CP_PK_I_BYTESIZE+/*node_num byteLen*/4+/*D_INTR byteLen*/2+mParam, + (cpSize)mParam); + } + + ippcpSts = ippsHashMessage_rmf(locTmp, + (int)(CP_PK_I_BYTESIZE+/*node_num byteLen*/4+/*D_INTR byteLen*/2+2*mParam), + tmp, + lmotsParams.hash_method); + IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts) + + node_num = node_num >> 1; + i++; + } + + /* Verify with given public key */ + /* --------------------------------------------- */ + BNU_CHUNK_T is_equal = cpIsEquBlock_ct(pKey->T1, tmp, (int)mParam); + if(is_equal) { + *pIsSignValid = 1; + } + + return ippcpSts; +} diff --git a/sources/ippcp/owncp.h b/sources/ippcp/owncp.h index 962af354..ca5fb8fb 100644 --- a/sources/ippcp/owncp.h +++ b/sources/ippcp/owncp.h @@ -111,7 +111,7 @@ typedef int cpSize; #define LSR32(x,nBits) ((x)>>(nBits)) #define LSL32(x,nBits) ((x)<<(nBits)) -/* Rorate (right and left) of WORD */ +/* Rotate (right and left) of WORD */ #if defined(_MSC_VER) && !defined( __ICL ) # include # define ROR32(x, nBits) _lrotr((x),(nBits)) @@ -125,7 +125,7 @@ typedef int cpSize; #define LSR64(x,nBits) ((x)>>(nBits)) #define LSL64(x,nBits) ((x)<<(nBits)) -/* Rorate (right and left) of DWORD */ +/* Rotate (right and left) of DWORD */ #define ROR64(x, nBits) (LSR64((x),(nBits)) | LSL64((x),64-(nBits))) #define ROL64(x, nBits) ROR64((x),(64-(nBits))) @@ -167,7 +167,7 @@ typedef int cpSize; /* test if library's feature is ON */ int cpGetFeature( Ipp64u Feature ); /* test CPU crypto features */ -__INLINE Ipp32u IsFeatureEnabled(Ipp64u niMmask) +__IPPCP_INLINE Ipp32u IsFeatureEnabled(Ipp64u niMmask) { return (Ipp32u)cpGetFeature(niMmask); } @@ -194,7 +194,7 @@ _mm_cvtsi64_si128(__int64 a) } #endif -#if !defined( __x86_64__ ) && defined(__GNUC__) +#if !defined( __x86_64__ ) && defined(__GNUC__) && (!defined(__clang__) || (__clang_major__ < 16)) extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64_si128 (long long __A) { diff --git a/sources/ippcp/pcpaes_avx2_vaes.h b/sources/ippcp/pcpaes_avx2_vaes.h index 15f1866a..42701c6c 100644 --- a/sources/ippcp/pcpaes_avx2_vaes.h +++ b/sources/ippcp/pcpaes_avx2_vaes.h @@ -14,13 +14,13 @@ * limitations under the License. *************************************************************************/ -/* -// +/* +// // Purpose: // Cryptography Primitive. // AES GCM AVX2 // Internal Functions Implementations -// +// */ #ifndef __AES_GCM_AVX2_H_ @@ -33,11 +33,49 @@ #if (_IPP==_IPP_H9) || (_IPP32E==_IPP32E_L9) +#ifdef __GNUC__ +#define ASM(a) __asm__(a); +#else +#define ASM(a) +#endif + +/* +// Zeroes the memory by 32 bit parts, +// because "epi32" is the minimal available granularity for avx2 store instructions. +// input: +// Ipp32u* out - pointer to the memory that needs to be zeroize +// int len - length of the "out" array, in 32-bit chunks +*/ +static __NOINLINE +void zeroize_256(Ipp32u* out, int len) +{ +#if defined(__GNUC__) + // Avoid dead code elimination for GNU compilers + ASM(""); +#endif + __m256i T = _mm256_setzero_si256(); + int i; + int tmp[8]; + int rest = len % 8; + if (rest == 0) + for(i=0; i<8; i++) + tmp[i] = (int)0xFFFFFFFF; + else { + for(i=0; i holds the result of the carry-less multiplication of GH by HK - *GH = _mm_xor_si128(*GH, tmpX2); +__IPPCP_INLINE void reduction(__m128i *hash0, __m128i *hash1) { + __m128i T1, T2, T3; //first phase of the reduction - tmpX0 = *GH; //copy GH into tmpX0, tmpX2, tmpX3 - tmpX2 = *GH; - tmpX3 = *GH; - tmpX0 = _mm_slli_epi64 (tmpX0, 63); //packed left shifting << 63 - tmpX2 = _mm_slli_epi64 (tmpX2, 62); //packed left shifting shift << 62 - tmpX3 = _mm_slli_epi64 (tmpX3, 57); //packed left shifting shift << 57 - tmpX0 = _mm_xor_si128(tmpX0, tmpX2); //xor the shifted versions - tmpX0 = _mm_xor_si128(tmpX0, tmpX3); - tmpX2 = tmpX0; - tmpX2 = _mm_slli_si128 (tmpX2, 8); //shift-L tmpX2 2 DWs - tmpX0 = _mm_srli_si128 (tmpX0, 8); //shift-R xmm2 2 DWs - *GH = _mm_xor_si128(*GH, tmpX2); //first phase of the reduction complete - tmpX1 = _mm_xor_si128(tmpX1, tmpX0); //save the lost MS 1-2-7 bits from first phase + T1 = *hash1; //copy GH into T1, T2, T3 + T2 = *hash1; + T3 = *hash1; + T1 = _mm_slli_epi64 (T1, 63); //packed left shifting << 63 + T2 = _mm_slli_epi64 (T2, 62); //packed left shifting << 62 + T3 = _mm_slli_epi64 (T3, 57); //packed left shifting << 57 + T1 = _mm_xor_si128(T1, T2); //xor the shifted versions + T1 = _mm_xor_si128(T1, T3); + T2 = T1; + T2 = _mm_slli_si128 (T2, 8); //shift-L T2 2 DWs + T1 = _mm_srli_si128 (T1, 8); //shift-R T1 2 DWs + *hash1 = _mm_xor_si128(*hash1, T2); //first phase of the reduction complete + *hash0 = _mm_xor_si128(*hash0, T1); //save the lost MS 1-2-7 bits from first phase //second phase of the reduction - tmpX2 = *GH; - tmpX2 = _mm_srli_epi64(tmpX2, 5); //packed right shifting >> 5 - tmpX2 = _mm_xor_si128(tmpX2, *GH); //xor shifted versions - tmpX2 = _mm_srli_epi64(tmpX2, 1); //packed right shifting >> 1 - tmpX2 = _mm_xor_si128(tmpX2, *GH); //xor shifted versions - tmpX2 = _mm_srli_epi64(tmpX2, 1); //packed right shifting >> 1 - *GH = _mm_xor_si128(*GH, tmpX2); //second phase of the reduction complete - *GH = _mm_xor_si128(*GH, tmpX1); //the result is in GH + T2 = *hash1; + T2 = _mm_srli_epi64(T2, 5); //packed right shifting >> 5 + T2 = _mm_xor_si128(T2, *hash1); //xor shifted versions + T2 = _mm_srli_epi64(T2, 1); //packed right shifting >> 1 + T2 = _mm_xor_si128(T2, *hash1); //xor shifted versions + T2 = _mm_srli_epi64(T2, 1); //packed right shifting >> 1 + *hash1 = _mm_xor_si128(*hash1, T2); //second phase of the reduction complete } /* -// avx2_clmul_gcm performs clmul with 256-bit registers; is used in the hash calculation step +// avx2_clmul_gcm16 performs the hash calculation with 256-bit registers for 16 blocks +// GH order - 0, 1 | 2, 3 | 4, 5 | 6, 7 | 8, 9 | 10, 11 | 12, 13 | 14, 15 +// HK order - 1, 0 | 3, 2 | 5, 4 | 7, 6 | 9, 8 | 11, 10 | 13, 12 | 15, 14 // input: -// const __m128i *HK - contains hashed keys -// const __m256i *HKeyKaratsuba - contains temporary data for Karatsuba method -// const __m256i *mask_lo - contains mask for taking lower bits -// const __m256i *mask_hi - contains mask for taking higher bits +// const __m256i *HK - contains hashed keys // input/output: -// __m128i *GH - contains GHASH. Will be overwritten in this function +// __m256i *GH - contains GHASH. Will be overwritten in this function +// output: +// __m128i GH[0] */ -__INLINE void avx2_clmul_gcm(__m256i *GH, const __m256i *HK, const __m256i *HKeyKaratsuba, const __m256i *mask_lo, const __m256i *mask_hi) { - __m256i tmpX0, tmpX1, tmpX2; - - tmpX2 = _mm256_shuffle_epi32 (*GH, SHUFD_MASK); - // Karatsuba Method - tmpX1 = *GH; - tmpX2 = _mm256_xor_si256(tmpX2, *GH); - *GH = _mm256_clmulepi64_epi128(*GH, *HK, 0x00); - // Karatsuba Method - - tmpX1 = _mm256_clmulepi64_epi128(tmpX1, *HK, 0x11); - tmpX2 = _mm256_clmulepi64_epi128(tmpX2, *HKeyKaratsuba, 0x00); - tmpX2 = _mm256_xor_si256(tmpX2, *GH); - tmpX2 = _mm256_xor_si256(tmpX2, tmpX1); - tmpX0 = _mm256_shuffle_epi32 (tmpX2, SHUFD_MASK); - tmpX2 = tmpX0; - tmpX0 = _mm256_and_si256(tmpX0, *mask_hi); - tmpX2 = _mm256_and_si256(tmpX2, *mask_lo); - *GH = _mm256_xor_si256(*GH, tmpX0); - tmpX1 = _mm256_xor_si256(tmpX1, tmpX2); - - // first phase of the reduction - tmpX0 = *GH; - *GH = _mm256_slli_epi64 (*GH, 1); - *GH = _mm256_xor_si256(*GH, tmpX0); - *GH = _mm256_slli_epi64 (*GH, 5); - *GH = _mm256_xor_si256(*GH, tmpX0); - *GH = _mm256_slli_epi64 (*GH, 57); - tmpX2 = _mm256_shuffle_epi32(*GH, SHUFD_MASK); - *GH = tmpX2; - tmpX2 = _mm256_and_si256(tmpX2, *mask_lo); - *GH = _mm256_and_si256(*GH, *mask_hi); - *GH = _mm256_xor_si256(*GH, tmpX0); - tmpX1 = _mm256_xor_si256(tmpX1, tmpX2); - - // second phase of the reduction - tmpX2 = *GH; - *GH = _mm256_srli_epi64(*GH, 5); - *GH = _mm256_xor_si256(*GH, tmpX2); - *GH = _mm256_srli_epi64(*GH, 1); - *GH = _mm256_xor_si256(*GH, tmpX2); - *GH = _mm256_srli_epi64(*GH, 1); - *GH = _mm256_xor_si256(*GH, tmpX2); - *GH = _mm256_xor_si256(*GH, tmpX1); +__IPPCP_INLINE __m128i avx2_clmul_gcm16(__m256i *GH, const __m256i *HK) { + __m256i tmpX0, tmpX2, tmpX3, tmpX4, tmpX5; + tmpX2 = _mm256_shuffle_epi32 (GH[0], SHUFD_MASK); + tmpX3 = _mm256_shuffle_epi32 (HK[7], SHUFD_MASK); + tmpX2 = _mm256_xor_si256(tmpX2, GH[0]); + tmpX3 = _mm256_xor_si256(tmpX3, HK[7]); + tmpX0 = _mm256_clmulepi64_epi128 (GH[0], HK[7], 0x11); + tmpX5 = _mm256_clmulepi64_epi128 (GH[0], HK[7], 0x00); + GH[0] = _mm256_clmulepi64_epi128 (tmpX2, tmpX3, 0x00); + + GH[0] = _mm256_xor_si256(GH[0], avx2_internal_mul(GH[1], HK[6], &tmpX0, &tmpX5)); + GH[0] = _mm256_xor_si256(GH[0], avx2_internal_mul(GH[2], HK[5], &tmpX0, &tmpX5)); + GH[0] = _mm256_xor_si256(GH[0], avx2_internal_mul(GH[3], HK[4], &tmpX0, &tmpX5)); + GH[0] = _mm256_xor_si256(GH[0], avx2_internal_mul(GH[4], HK[3], &tmpX0, &tmpX5)); + GH[0] = _mm256_xor_si256(GH[0], avx2_internal_mul(GH[5], HK[2], &tmpX0, &tmpX5)); + GH[0] = _mm256_xor_si256(GH[0], avx2_internal_mul(GH[6], HK[1], &tmpX0, &tmpX5)); + GH[0] = _mm256_xor_si256(GH[0], avx2_internal_mul(GH[7], HK[0], &tmpX0, &tmpX5)); + + GH[0] = _mm256_xor_si256(GH[0], tmpX0); + tmpX2 = _mm256_xor_si256(GH[0], tmpX5); + tmpX4 = _mm256_slli_si256(tmpX2, 8); + tmpX2 = _mm256_srli_si256(tmpX2, 8); + tmpX5 = _mm256_xor_si256(tmpX5, tmpX4); // + tmpX0 = _mm256_xor_si256(tmpX0, tmpX2); // tmpX0:tmpX5> holds the result of the accumulated carry-less multiplications + + __m128i T0, T1; + T0 = _mm_xor_si128(_mm256_extractf128_si256(tmpX0, 0), _mm256_extractf128_si256(tmpX0, 1)); + T1 = _mm_xor_si128(_mm256_extractf128_si256(tmpX5, 0), _mm256_extractf128_si256(tmpX5, 1)); + + // reduction phase + reduction(&T0, &T1); + + GH[0] = _mm256_setr_m128i(_mm_xor_si128(T1, T0), _mm_setzero_si128()); //the result is in GH + return _mm_xor_si128(T1, T0); } /* -// aes_encoder_avx2vaes_sb is used for single block encryption +// avx2_clmul_gcm8 performs the hash calculation with 256-bit registers for 8 blocks +// GH order - 0, 1 | 2, 3 | 4, 5 | 6, 7 +// HK order - 1, 0 | 3, 2 | 5, 4 | 7, 6 // input: -// const Ipp8u *in - contains data for encryprion -// const int Nr - contains number of the rounds -// const __m256i* keys - contains keys +// const __m256i *HK - contains hashed keys +// input/output: +// __m256i *GH - contains GHASH. Will be overwritten in this function // output: -// Ipp8u *out - stores encrypted data. +// __m128i GH[0] */ -__INLINE void aes_encoder_avx2vaes_sb(const Ipp8u *in, Ipp8u *out, const int Nr, const __m256i* keys) { - __m128i lo = _mm_loadu_si128((void*)in); - __m128i hi = _mm_setzero_si128(); - __m256i block = _mm256_setr_m128i(lo, hi); - block = _mm256_xor_si256(block, *keys); - for(int round = 1; round < Nr; round++) { - keys++; - block = _mm256_aesenc_epi128(block, *keys); - } - keys++; - block = _mm256_aesenclast_epi128(block, *keys); - _mm_storeu_si128((void*)out, _mm256_castsi256_si128(block)); +__IPPCP_INLINE __m128i avx2_clmul_gcm8(__m256i *GH, const __m256i *HK) { + __m256i tmpX0, tmpX2, tmpX3, tmpX4, tmpX5; + tmpX2 = _mm256_shuffle_epi32 (GH[0], SHUFD_MASK); + tmpX3 = _mm256_shuffle_epi32 (HK[3], SHUFD_MASK); + tmpX2 = _mm256_xor_si256(tmpX2, GH[0]); + tmpX3 = _mm256_xor_si256(tmpX3, HK[3]); + tmpX0 = _mm256_clmulepi64_epi128 (GH[0], HK[3], 0x11); + tmpX5 = _mm256_clmulepi64_epi128 (GH[0], HK[3], 0x00); + GH[0] = _mm256_clmulepi64_epi128 (tmpX2, tmpX3, 0x00); + + GH[0] = _mm256_xor_si256(GH[0], avx2_internal_mul(GH[1], HK[2], &tmpX0, &tmpX5)); + GH[0] = _mm256_xor_si256(GH[0], avx2_internal_mul(GH[2], HK[1], &tmpX0, &tmpX5)); + GH[0] = _mm256_xor_si256(GH[0], avx2_internal_mul(GH[3], HK[0], &tmpX0, &tmpX5)); + + GH[0] = _mm256_xor_si256(GH[0], tmpX0); + tmpX2 = _mm256_xor_si256(GH[0], tmpX5); + tmpX4 = _mm256_slli_si256(tmpX2, 8); + tmpX2 = _mm256_srli_si256(tmpX2, 8); + tmpX5 = _mm256_xor_si256(tmpX5, tmpX4); // + tmpX0 = _mm256_xor_si256(tmpX0, tmpX2); // tmpX0:tmpX5> holds the result of the accumulated carry-less multiplications + + __m128i T0, T1; + T0 = _mm_xor_si128(_mm256_extractf128_si256(tmpX0, 0), _mm256_extractf128_si256(tmpX0, 1)); + T1 = _mm_xor_si128(_mm256_extractf128_si256(tmpX5, 0), _mm256_extractf128_si256(tmpX5, 1)); + + // reduction phase + reduction(&T0, &T1); + + GH[0] = _mm256_setr_m128i(_mm_xor_si128(T1, T0), _mm_setzero_si128()); //the result is in GH + return _mm_xor_si128(T1, T0); +} + +/* +// avx2_clmul_gcm4 performs the hash calculation with 256-bit registers for 4 blocks +// GH order - 0, 1 | 2, 3 +// HK order - 1, 0 | 3, 2 +// input: +// const __m256i *HK - contains hashed keys +// input/output: +// __m256i *GH - contains GHASH. Will be overwritten in this function +// output: +// __m128i GH[0] +*/ +__IPPCP_INLINE __m128i avx2_clmul_gcm4(__m256i *GH, const __m256i *HK) { + __m256i tmpX0, tmpX2, tmpX3, tmpX4, tmpX5; + tmpX2 = _mm256_shuffle_epi32 (GH[0], SHUFD_MASK); + tmpX3 = _mm256_shuffle_epi32 (HK[1], SHUFD_MASK); + tmpX2 = _mm256_xor_si256(tmpX2, GH[0]); + tmpX3 = _mm256_xor_si256(tmpX3, HK[1]); + tmpX0 = _mm256_clmulepi64_epi128 (GH[0], HK[1], 0x11); + tmpX5 = _mm256_clmulepi64_epi128 (GH[0], HK[1], 0x00); + GH[0] = _mm256_clmulepi64_epi128 (tmpX2, tmpX3, 0x00); + + GH[0] = _mm256_xor_si256(GH[0], avx2_internal_mul(GH[1], HK[0], &tmpX0, &tmpX5)); + + GH[0] = _mm256_xor_si256(GH[0], tmpX0); + tmpX2 = _mm256_xor_si256(GH[0], tmpX5); + tmpX4 = _mm256_slli_si256(tmpX2, 8); + tmpX2 = _mm256_srli_si256(tmpX2, 8); + tmpX5 = _mm256_xor_si256(tmpX5, tmpX4); // + tmpX0 = _mm256_xor_si256(tmpX0, tmpX2); // tmpX0:tmpX5> holds the result of the accumulated carry-less multiplications + + __m128i T0, T1; + T0 = _mm_xor_si128(_mm256_extractf128_si256(tmpX0, 0), _mm256_extractf128_si256(tmpX0, 1)); + T1 = _mm_xor_si128(_mm256_extractf128_si256(tmpX5, 0), _mm256_extractf128_si256(tmpX5, 1)); + + // reduction phase + reduction(&T0, &T1); + + GH[0] = _mm256_setr_m128i(_mm_xor_si128(T1, T0), _mm_setzero_si128()); //the result is in GH + + return _mm_xor_si128(T1, T0); +} + +/* +// avx2_clmul_gcm2 performs the hash calculation with 256-bit registers for 2 blocks +// GH order - 0, 1 +// HK order - 1, 0 +// input: +// const __m256i *HK - contains hashed keys +// input/output: +// __m256i *GH - contains GHASH. Will be overwritten in this function +// output: +// __m128i GH[0] +*/ +__IPPCP_INLINE __m128i avx2_clmul_gcm2(__m256i *GH, const __m256i *HK) { + __m256i tmpX0, tmpX2, tmpX3, tmpX4, tmpX5; + tmpX2 = _mm256_shuffle_epi32 (GH[0], SHUFD_MASK); + tmpX3 = _mm256_shuffle_epi32 (HK[0], SHUFD_MASK); + tmpX2 = _mm256_xor_si256(tmpX2, GH[0]); + tmpX3 = _mm256_xor_si256(tmpX3, HK[0]); + tmpX0 = _mm256_clmulepi64_epi128 (GH[0], HK[0], 0x11); + tmpX5 = _mm256_clmulepi64_epi128 (GH[0], HK[0], 0x00); + GH[0] = _mm256_clmulepi64_epi128 (tmpX2, tmpX3, 0x00); + + GH[0] = _mm256_xor_si256(GH[0], tmpX0); + tmpX2 = _mm256_xor_si256(GH[0], tmpX5); + tmpX4 = _mm256_slli_si256(tmpX2, 8); + tmpX2 = _mm256_srli_si256(tmpX2, 8); + tmpX5 = _mm256_xor_si256(tmpX5, tmpX4); // + tmpX0 = _mm256_xor_si256(tmpX0, tmpX2); // tmpX0:tmpX5> holds the result of the accumulated carry-less multiplications + + __m128i T0, T1; + T0 = _mm_xor_si128(_mm256_extractf128_si256(tmpX0, 0), _mm256_extractf128_si256(tmpX0, 1)); + T1 = _mm_xor_si128(_mm256_extractf128_si256(tmpX5, 0), _mm256_extractf128_si256(tmpX5, 1)); + + // reduction phase + reduction(&T0, &T1); + + GH[0] = _mm256_setr_m128i(_mm_xor_si128(T1, T0), _mm_setzero_si128()); //the result is in GH + return _mm_xor_si128(T1, T0); +} + +/* +// avx2_clmul_gcm performs the hash calculation with 256-bit registers for 1 blocks +// GH order - 0 +// HK order - 0 +// input: +// const __m256i *HK - contains hashed keys +// input/output: +// __m256i *GH - contains GHASH. Will be overwritten in this function +// output: +// __m128i GH[0] +*/ +__IPPCP_INLINE __m128i avx2_clmul_gcm(__m256i *GH, const __m256i *HK) { + __m256i tmpX0, tmpX2, tmpX3, tmpX4, tmpX5; + tmpX2 = _mm256_shuffle_epi32 (GH[0], SHUFD_MASK); + tmpX3 = _mm256_shuffle_epi32 (HK[0], SHUFD_MASK); + tmpX2 = _mm256_xor_si256(tmpX2, GH[0]); + tmpX3 = _mm256_xor_si256(tmpX3, HK[0]); + tmpX0 = _mm256_clmulepi64_epi128 (GH[0], HK[0], 0x11); + tmpX5 = _mm256_clmulepi64_epi128 (GH[0], HK[0], 0x00); + GH[0] = _mm256_clmulepi64_epi128 (tmpX2, tmpX3, 0x00); + + GH[0] = _mm256_xor_si256(GH[0], tmpX0); + tmpX2 = _mm256_xor_si256(GH[0], tmpX5); + tmpX4 = _mm256_slli_si256(tmpX2, 8); + tmpX2 = _mm256_srli_si256(tmpX2, 8); + tmpX5 = _mm256_xor_si256(tmpX5, tmpX4); // + tmpX0 = _mm256_xor_si256(tmpX0, tmpX2); // tmpX0:tmpX5> holds the result of the accumulated carry-less multiplications + + __m128i T0, T1; + T0 = _mm256_extractf128_si256(tmpX0, 0); + T1 = _mm256_extractf128_si256(tmpX5, 0); + + // reduction phase + reduction(&T0, &T1); + + GH[0] = _mm256_setr_m128i(_mm_xor_si128(T1, T0), _mm_setzero_si128()); //the result is in GH + return _mm_xor_si128(T1, T0); } #endif /* #if(_IPP==_IPP_H9) || (_IPP32E==_IPP32E_L9) */ diff --git a/sources/ippcp/pcpaes_avx2_vaes_decrypt.c b/sources/ippcp/pcpaes_avx2_vaes_decrypt.c index 81a21d22..583de7f2 100644 --- a/sources/ippcp/pcpaes_avx2_vaes_decrypt.c +++ b/sources/ippcp/pcpaes_avx2_vaes_decrypt.c @@ -5,7 +5,7 @@ * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * -* http://www.apache.org/licenses/LICENSE-2.0 +* http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -32,181 +32,555 @@ IPP_OWN_DEFN (void, AesGcmDec_vaes_avx2, (Ipp8u* pDst, const Ipp8u* pSrc, int le if (len < 256) { IppsAESSpec* pAES = AESGCM_CIPHER(pState); RijnCipher encoder = RIJ_ENCODER(pAES); + Ipp8u hkeys_old_order[48]; + + // put the hash keys in the correct order (hKey*t, (hKey*t)^2, (hKey*t)^4) + for (int i = 0; i < 32; i++) { + *(hkeys_old_order+i) = *(AESGCM_HKEY(pState)+i); // HKEY 0-32 + if (i < 16) + *(hkeys_old_order+i+32) = *(AESGCM_HKEY(pState)+i+48); // HKEY 32-48 + } + AesGcmDec_avx(pDst, pSrc, len, encoder, RIJ_NR(pAES), RIJ_EKEYS(pAES), AESGCM_GHASH(pState), - AESGCM_COUNTER(pState), AESGCM_ECOUNTER(pState), AESGCM_HKEY(pState)); + AESGCM_COUNTER(pState), AESGCM_ECOUNTER(pState), hkeys_old_order); + + // zeroizing + zeroize_256((Ipp32u*)hkeys_old_order, 12); } else { - const int nloop = len / STEP_SIZE; IppsRijndael128Spec* pAES = AESGCM_CIPHER(pState); Ipp8u* pCounter = AESGCM_COUNTER(pState); Ipp8u* pECounter = AESGCM_ECOUNTER(pState); - __m256i pCounter256, pCounter256_1, pECounter256, pECounter256_1; - __m256i block, block1, cipherText, cipherText_1, plainText, plainText_1; + __m256i pCounter256, pCounter256_1, pCounter256_2, pCounter256_3, pCounter256_4, pCounter256_5, pCounter256_6, pCounter256_7; + __m256i block, block1, block2, block3, block4, block5, block6, block7; + __m256i cipherText, cipherText_1, cipherText_2, cipherText_3, cipherText_4, cipherText_5, cipherText_6, cipherText_7; + __m256i plainText, plainText_1, plainText_2, plainText_3, plainText_4, plainText_5, plainText_6, plainText_7; + __m256i rpHash[8]; + __m256i HashKey[8]; + __m128i resultHash = _mm_setzero_si128(); + __m256i tmpKey; // setting temporary data for incremention + const __m256i increment1 = _mm256_loadu_si256((void*)_increment1); // increment by 1 const __m256i increment2 = _mm256_loadu_si256((void*)_increment2); // increment by 2 - const __m256i increment4 = _mm256_loadu_si256((void*)_increment4); // increment by 4 - const __m256i shuffle_mask = _mm256_loadu_si256((void*)swapBytes256); + const __m256i increment4 = _mm256_loadu_si256((void*)_increment4); // increment by 4 + const __m256i increment8 = _mm256_loadu_si256((void*)_increment8); // increment by 8 + const __m256i increment16 = _mm256_loadu_si256((void*)_increment16); // increment by 16 + const __m256i shuffle_mask = _mm256_loadu_si256((void*)swapBytes256); - // vectors are used to zeroizing - __m128i zero_128 = _mm_setzero_si128(); + // vector is used to zeroizing __m256i zero_256 = _mm256_setzero_si256(); - // loading keys from memory - __m256i rkeys[MAX_NK]; - __m128i tmp_keys_128; - for (int i = 0; i < RIJ_NR(pAES) + 1; i++) { - tmp_keys_128 = _mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+i*16)); - rkeys[i] = _mm256_setr_m128i(tmp_keys_128, tmp_keys_128); + // setting some masks + const __m128i shuff_mask_128 = _mm_loadu_si128((void*)_shuff_mask_128); + const __m256i shuff_mask_256 = _mm256_loadu_si256((void*)_shuff_mask_256); + + // loading counters from memory + __m128i lo = _mm_loadu_si128((void*)pCounter); + IncrementCounter32(pCounter); + __m128i hi = _mm_loadu_si128((void*)pCounter); + pCounter256_7 = _mm256_setr_m128i(lo, hi); + pCounter256 = pCounter256_7; + IncrementRegister256(pCounter256_7, increment2, shuffle_mask); + pCounter256_1 = pCounter256_7; + IncrementRegister256(pCounter256_7, increment2, shuffle_mask); + pCounter256_2 = pCounter256_7; + IncrementRegister256(pCounter256_7, increment2, shuffle_mask); + pCounter256_3 = pCounter256_7; + IncrementRegister256(pCounter256_7, increment2, shuffle_mask); + pCounter256_4 = pCounter256_7; + IncrementRegister256(pCounter256_7, increment2, shuffle_mask); + pCounter256_5 = pCounter256_7; + IncrementRegister256(pCounter256_7, increment2, shuffle_mask); + pCounter256_6 = pCounter256_7; + IncrementRegister256(pCounter256_7, increment2, shuffle_mask); + + lo = _mm_loadu_si128((__m128i*)AESGCM_GHASH(pState)); + hi = _mm_setzero_si128(); + rpHash[0] = _mm256_setr_m128i(_mm_shuffle_epi8(lo, shuff_mask_128), hi); + + // setting hash keys + Ipp8u *pkeys = AESGCM_HKEY(pState); + for (int i = 0; i < 8; i++) { + HashKey[i] = _mm256_setr_m128i(_mm_loadu_si128((void*)(pkeys+16)), _mm_loadu_si128((void*)pkeys)); + pkeys += 32; } - // skip extra calculations if plaintext less than 4 blocks - if (nloop) { - // loading counters from memory - __m128i lo, hi; - lo = _mm_loadu_si128((void*)pCounter); - IncrementCounter32(pCounter); - hi = _mm_loadu_si128((void*)pCounter); - pCounter256_1 = _mm256_setr_m128i(lo, hi); - pCounter256 = pCounter256_1; - IncrementRegister256(pCounter256_1, increment2, shuffle_mask); - - // setting some masks - const __m128i shuff_mask_128 = _mm_loadu_si128((void*)_shuff_mask_128); - const __m256i shuff_mask_256 = _mm256_loadu_si256((void*)_shuff_mask_256); - const __m256i mask_lo_256 = _mm256_loadu_si256((void*)_mask_lo_256); - const __m256i mask_hi_256 = _mm256_loadu_si256((void*)_mask_hi_256); - - lo = _mm_loadu_si128((void*)AESGCM_GHASH(pState)); - hi = _mm_setzero_si128(); - __m256i rpHash0 = _mm256_setr_m128i(_mm_shuffle_epi8(lo, shuff_mask_128), hi); - __m256i rpHash1 = _mm256_setzero_si256(); - - // setting pre-calculated data for hash combining - Ipp8u *pkeys = AESGCM_HKEY(pState); - __m128i HashKey0 = _mm_loadu_si128((void*)pkeys); - pkeys += 16; - __m128i HashKey2 = _mm_loadu_si128((void*)pkeys); - pkeys += 16; - __m128i HashKey4 = _mm_loadu_si128((void*)pkeys); - - // setting pre-calculated data in correct order for Karatsuba method - __m256i HKey = _mm256_setr_m128i(HashKey4, HashKey4); - __m256i HKeyKaratsuba = _mm256_shuffle_epi32(HKey, SHUFD_MASK); - HKeyKaratsuba = _mm256_xor_si256(HKey, HKeyKaratsuba); - do { - // decrypt stage - block = _mm256_xor_si256(pCounter256, *rkeys); - block1 = _mm256_xor_si256(pCounter256_1, *rkeys); - block = _mm256_aesenc_epi128(block, *(rkeys+1)); - block1 = _mm256_aesenc_epi128(block1, *(rkeys+1)); - block = _mm256_aesenc_epi128(block, *(rkeys+2)); - block1 = _mm256_aesenc_epi128(block1, *(rkeys+2)); - block = _mm256_aesenc_epi128(block, *(rkeys+3)); - block1 = _mm256_aesenc_epi128(block1, *(rkeys+3)); - IncrementRegister256(pCounter256, increment4, shuffle_mask); - block = _mm256_aesenc_epi128(block, *(rkeys+4)); - block1 = _mm256_aesenc_epi128(block1, *(rkeys+4)); - block = _mm256_aesenc_epi128(block, *(rkeys+5)); - block1 = _mm256_aesenc_epi128(block1, *(rkeys+5)); - block = _mm256_aesenc_epi128(block, *(rkeys+6)); - block1 = _mm256_aesenc_epi128(block1, *(rkeys+6)); - block = _mm256_aesenc_epi128(block, *(rkeys+7)); - block1 = _mm256_aesenc_epi128(block1, *(rkeys+7)); - block = _mm256_aesenc_epi128(block, *(rkeys+8)); - block1 = _mm256_aesenc_epi128(block1, *(rkeys+8)); - block = _mm256_aesenc_epi128(block, *(rkeys+9)); - block1 = _mm256_aesenc_epi128(block1, *(rkeys+9)); - IncrementRegister256(pCounter256_1, increment4, shuffle_mask); - if (RIJ_NR(pAES) >= 12) { - block = _mm256_aesenc_epi128(block, *(rkeys+10)); - block1 = _mm256_aesenc_epi128(block1, *(rkeys+10)); - block = _mm256_aesenc_epi128(block, *(rkeys+11)); - block1 = _mm256_aesenc_epi128(block1, *(rkeys+11)); - if (RIJ_NR(pAES) >= 14) { - block = _mm256_aesenc_epi128(block, *(rkeys+12)); - block1 = _mm256_aesenc_epi128(block1, *(rkeys+12)); - block = _mm256_aesenc_epi128(block, *(rkeys+13)); - block1 = _mm256_aesenc_epi128(block1, *(rkeys+13)); - } + while(len >= 16*BLOCK_SIZE) { + // decrypt stage + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)))); + block = _mm256_xor_si256(pCounter256, tmpKey); + block1 = _mm256_xor_si256(pCounter256_1, tmpKey); + block2 = _mm256_xor_si256(pCounter256_2, tmpKey); + block3 = _mm256_xor_si256(pCounter256_3, tmpKey); + block4 = _mm256_xor_si256(pCounter256_4, tmpKey); + block5 = _mm256_xor_si256(pCounter256_5, tmpKey); + block6 = _mm256_xor_si256(pCounter256_6, tmpKey); + block7 = _mm256_xor_si256(pCounter256_7, tmpKey); + IncrementRegister256(pCounter256, increment16, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+1*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + block4 = _mm256_aesenc_epi128(block4, tmpKey); + block5 = _mm256_aesenc_epi128(block5, tmpKey); + block6 = _mm256_aesenc_epi128(block6, tmpKey); + block7 = _mm256_aesenc_epi128(block7, tmpKey); + IncrementRegister256(pCounter256_1, increment16, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+2*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + block4 = _mm256_aesenc_epi128(block4, tmpKey); + block5 = _mm256_aesenc_epi128(block5, tmpKey); + block6 = _mm256_aesenc_epi128(block6, tmpKey); + block7 = _mm256_aesenc_epi128(block7, tmpKey); + IncrementRegister256(pCounter256_2, increment16, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+3*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + block4 = _mm256_aesenc_epi128(block4, tmpKey); + block5 = _mm256_aesenc_epi128(block5, tmpKey); + block6 = _mm256_aesenc_epi128(block6, tmpKey); + block7 = _mm256_aesenc_epi128(block7, tmpKey); + IncrementRegister256(pCounter256_3, increment16, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+4*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + block4 = _mm256_aesenc_epi128(block4, tmpKey); + block5 = _mm256_aesenc_epi128(block5, tmpKey); + block6 = _mm256_aesenc_epi128(block6, tmpKey); + block7 = _mm256_aesenc_epi128(block7, tmpKey); + IncrementRegister256(pCounter256_4, increment16, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+5*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + block4 = _mm256_aesenc_epi128(block4, tmpKey); + block5 = _mm256_aesenc_epi128(block5, tmpKey); + block6 = _mm256_aesenc_epi128(block6, tmpKey); + block7 = _mm256_aesenc_epi128(block7, tmpKey); + IncrementRegister256(pCounter256_5, increment16, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+6*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + block4 = _mm256_aesenc_epi128(block4, tmpKey); + block5 = _mm256_aesenc_epi128(block5, tmpKey); + block6 = _mm256_aesenc_epi128(block6, tmpKey); + block7 = _mm256_aesenc_epi128(block7, tmpKey); + IncrementRegister256(pCounter256_6, increment16, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+7*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + block4 = _mm256_aesenc_epi128(block4, tmpKey); + block5 = _mm256_aesenc_epi128(block5, tmpKey); + block6 = _mm256_aesenc_epi128(block6, tmpKey); + block7 = _mm256_aesenc_epi128(block7, tmpKey); + IncrementRegister256(pCounter256_7, increment16, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+8*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + block4 = _mm256_aesenc_epi128(block4, tmpKey); + block5 = _mm256_aesenc_epi128(block5, tmpKey); + block6 = _mm256_aesenc_epi128(block6, tmpKey); + block7 = _mm256_aesenc_epi128(block7, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+9*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + block4 = _mm256_aesenc_epi128(block4, tmpKey); + block5 = _mm256_aesenc_epi128(block5, tmpKey); + block6 = _mm256_aesenc_epi128(block6, tmpKey); + block7 = _mm256_aesenc_epi128(block7, tmpKey); + if (RIJ_NR(pAES) >= 12) { + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+10*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + block4 = _mm256_aesenc_epi128(block4, tmpKey); + block5 = _mm256_aesenc_epi128(block5, tmpKey); + block6 = _mm256_aesenc_epi128(block6, tmpKey); + block7 = _mm256_aesenc_epi128(block7, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+11*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + block4 = _mm256_aesenc_epi128(block4, tmpKey); + block5 = _mm256_aesenc_epi128(block5, tmpKey); + block6 = _mm256_aesenc_epi128(block6, tmpKey); + block7 = _mm256_aesenc_epi128(block7, tmpKey); + if (RIJ_NR(pAES) >= 14) { + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+12*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + block4 = _mm256_aesenc_epi128(block4, tmpKey); + block5 = _mm256_aesenc_epi128(block5, tmpKey); + block6 = _mm256_aesenc_epi128(block6, tmpKey); + block7 = _mm256_aesenc_epi128(block7, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+13*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + block4 = _mm256_aesenc_epi128(block4, tmpKey); + block5 = _mm256_aesenc_epi128(block5, tmpKey); + block6 = _mm256_aesenc_epi128(block6, tmpKey); + block7 = _mm256_aesenc_epi128(block7, tmpKey); } - pECounter256 = _mm256_aesenclast_epi128(block, *(rkeys+RIJ_NR(pAES))); - pECounter256_1 = _mm256_aesenclast_epi128(block1, *(rkeys+RIJ_NR(pAES))); - - // set ciphertext - plainText = _mm256_loadu_si256((void*)pSrc); - cipherText = _mm256_xor_si256(plainText, pECounter256); - pSrc += HALF_STEP_SIZE; - plainText_1 = _mm256_loadu_si256((void*)pSrc); - cipherText_1 = _mm256_xor_si256(plainText_1, pECounter256_1); - pSrc += HALF_STEP_SIZE; - - // hash calculation stage - rpHash0 = _mm256_xor_si256(rpHash0, _mm256_shuffle_epi8(plainText, shuff_mask_256)); - _mm256_storeu_si256((void*)pDst, cipherText); - pDst += HALF_STEP_SIZE; - _mm256_storeu_si256((void*)pDst, cipherText_1); - pDst += HALF_STEP_SIZE; - rpHash1 = _mm256_xor_si256(rpHash1, _mm256_shuffle_epi8(plainText_1, shuff_mask_256)); - len -= STEP_SIZE; - if (len >= STEP_SIZE) { - avx2_clmul_gcm(&rpHash0, &HKey, &HKeyKaratsuba, &mask_lo_256, &mask_hi_256); - avx2_clmul_gcm(&rpHash1, &HKey, &HKeyKaratsuba, &mask_lo_256, &mask_hi_256); + } + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+RIJ_NR(pAES)*16))); + block = _mm256_aesenclast_epi128(block, tmpKey); + block1 = _mm256_aesenclast_epi128(block1, tmpKey); + block2 = _mm256_aesenclast_epi128(block2, tmpKey); + block3 = _mm256_aesenclast_epi128(block3, tmpKey); + block4 = _mm256_aesenclast_epi128(block4, tmpKey); + block5 = _mm256_aesenclast_epi128(block5, tmpKey); + block6 = _mm256_aesenclast_epi128(block6, tmpKey); + block7 = _mm256_aesenclast_epi128(block7, tmpKey); + + // set ciphertext + plainText = _mm256_loadu_si256((void*)pSrc); + cipherText = _mm256_xor_si256(plainText, block); + plainText_1 = _mm256_loadu_si256((void*)(pSrc+2*BLOCK_SIZE)); + cipherText_1 = _mm256_xor_si256(plainText_1, block1); + plainText_2 = _mm256_loadu_si256((void*)(pSrc+4*BLOCK_SIZE)); + cipherText_2 = _mm256_xor_si256(plainText_2, block2); + plainText_3 = _mm256_loadu_si256((void*)(pSrc+6*BLOCK_SIZE)); + cipherText_3 = _mm256_xor_si256(plainText_3, block3); + plainText_4 = _mm256_loadu_si256((void*)(pSrc+8*BLOCK_SIZE)); + cipherText_4 = _mm256_xor_si256(plainText_4, block4); + plainText_5 = _mm256_loadu_si256((void*)(pSrc+10*BLOCK_SIZE)); + cipherText_5 = _mm256_xor_si256(plainText_5, block5); + plainText_6 = _mm256_loadu_si256((void*)(pSrc+12*BLOCK_SIZE)); + cipherText_6 = _mm256_xor_si256(plainText_6, block6); + plainText_7 = _mm256_loadu_si256((void*)(pSrc+14*BLOCK_SIZE)); + cipherText_7 = _mm256_xor_si256(plainText_7, block7); + pSrc += 16*BLOCK_SIZE; + _mm256_storeu_si256((void*)pDst, cipherText); + _mm256_storeu_si256((void*)(pDst+2*BLOCK_SIZE), cipherText_1); + _mm256_storeu_si256((void*)(pDst+4*BLOCK_SIZE), cipherText_2); + _mm256_storeu_si256((void*)(pDst+6*BLOCK_SIZE), cipherText_3); + _mm256_storeu_si256((void*)(pDst+8*BLOCK_SIZE), cipherText_4); + _mm256_storeu_si256((void*)(pDst+10*BLOCK_SIZE), cipherText_5); + _mm256_storeu_si256((void*)(pDst+12*BLOCK_SIZE), cipherText_6); + _mm256_storeu_si256((void*)(pDst+14*BLOCK_SIZE), cipherText_7); + pDst += 16*BLOCK_SIZE; + + // hash calculation stage + rpHash[0] = _mm256_xor_si256(rpHash[0], _mm256_shuffle_epi8(plainText, shuff_mask_256)); + rpHash[1] = _mm256_shuffle_epi8(plainText_1, shuff_mask_256); + rpHash[2] = _mm256_shuffle_epi8(plainText_2, shuff_mask_256); + rpHash[3] = _mm256_shuffle_epi8(plainText_3, shuff_mask_256); + rpHash[4] = _mm256_shuffle_epi8(plainText_4, shuff_mask_256); + rpHash[5] = _mm256_shuffle_epi8(plainText_5, shuff_mask_256); + rpHash[6] = _mm256_shuffle_epi8(plainText_6, shuff_mask_256); + rpHash[7] = _mm256_shuffle_epi8(plainText_7, shuff_mask_256); + resultHash = avx2_clmul_gcm16(rpHash, HashKey); + + len -= 16*BLOCK_SIZE; + } // while(len >= 16*BLOCK_SIZE) + + if (len >= 8*BLOCK_SIZE) { + // decrypt stage + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)))); + block = _mm256_xor_si256(pCounter256, tmpKey); + block1 = _mm256_xor_si256(pCounter256_1, tmpKey); + block2 = _mm256_xor_si256(pCounter256_2, tmpKey); + block3 = _mm256_xor_si256(pCounter256_3, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+1*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + IncrementRegister256(pCounter256, increment8, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+2*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+3*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + IncrementRegister256(pCounter256_1, increment8, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+4*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+5*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + IncrementRegister256(pCounter256_2, increment8, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+6*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+7*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+8*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + IncrementRegister256(pCounter256_3, increment8, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+9*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + if (RIJ_NR(pAES) >= 12) { + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+10*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+11*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + if (RIJ_NR(pAES) >= 14) { + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+12*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+13*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); } - } while(len >= STEP_SIZE); - - // loading temporary data to memory - _mm_storeu_si128((void*)pECounter, _mm256_extractf128_si256(pECounter256, 1)); - _mm_storeu_si128((void*)pCounter, _mm256_castsi256_si128(pCounter256)); - - // combine hash - __m128i GHash0 = _mm256_extractf128_si256(rpHash0, 0); - __m128i GHash1 = _mm256_extractf128_si256(rpHash0, 1); - __m128i GHash2 = _mm256_extractf128_si256(rpHash1, 0); - __m128i GHash3 = _mm256_extractf128_si256(rpHash1, 1); - - sse_clmul_gcm(&GHash0, &HashKey4); //GHash0 = GHash0 * (HashKey^4)<<1 mod poly - sse_clmul_gcm(&GHash1, &HashKey2); //GHash1 = GHash1 * (HashKey^2)<<1 mod poly - sse_clmul_gcm(&GHash2, &HashKey0); //GHash2 = GHash2 * (HashKey^1)<<1 mod poly - GHash3 = _mm_xor_si128(GHash3, GHash1); - GHash3 = _mm_xor_si128(GHash3, GHash2); - - sse_clmul_gcm(&GHash3, &HashKey0); //GHash3 = GHash3 * (HashKey)<<1 mod poly - GHash3 = _mm_xor_si128(GHash3, GHash0); - GHash3 = _mm_shuffle_epi8(GHash3, shuff_mask_128); - _mm_storeu_si128((void*)(AESGCM_GHASH(pState)), GHash3); - - // HKeys zeroizing - _mm_storeu_si128(&HashKey0, zero_128); - _mm_storeu_si128(&HashKey2, zero_128); - _mm_storeu_si128(&HashKey4, zero_128); - _mm256_storeu_si256(&HKey, zero_256); - _mm256_storeu_si256(&HKeyKaratsuba, zero_256); + } + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+RIJ_NR(pAES)*16))); + block = _mm256_aesenclast_epi128(block, tmpKey); + block1 = _mm256_aesenclast_epi128(block1, tmpKey); + block2 = _mm256_aesenclast_epi128(block2, tmpKey); + block3 = _mm256_aesenclast_epi128(block3, tmpKey); + + // set ciphertext + plainText = _mm256_loadu_si256((void*)pSrc); + cipherText = _mm256_xor_si256(plainText, block); + plainText_1 = _mm256_loadu_si256((void*)(pSrc+2*BLOCK_SIZE)); + cipherText_1 = _mm256_xor_si256(plainText_1, block1); + plainText_2 = _mm256_loadu_si256((void*)(pSrc+4*BLOCK_SIZE)); + cipherText_2 = _mm256_xor_si256(plainText_2, block2); + plainText_3 = _mm256_loadu_si256((void*)(pSrc+6*BLOCK_SIZE)); + cipherText_3 = _mm256_xor_si256(plainText_3, block3); + pSrc += 8*BLOCK_SIZE; + _mm256_storeu_si256((void*)pDst, cipherText); + _mm256_storeu_si256((void*)(pDst+2*BLOCK_SIZE), cipherText_1); + _mm256_storeu_si256((void*)(pDst+4*BLOCK_SIZE), cipherText_2); + _mm256_storeu_si256((void*)(pDst+6*BLOCK_SIZE), cipherText_3); + pDst += 8*BLOCK_SIZE; + + // hash calculation stage + rpHash[0] = _mm256_xor_si256(rpHash[0], _mm256_shuffle_epi8(plainText, shuff_mask_256)); + rpHash[1] = _mm256_shuffle_epi8(plainText_1, shuff_mask_256); + rpHash[2] = _mm256_shuffle_epi8(plainText_2, shuff_mask_256); + rpHash[3] = _mm256_shuffle_epi8(plainText_3, shuff_mask_256); + resultHash = avx2_clmul_gcm8(rpHash, HashKey); + + len -= 8*BLOCK_SIZE; + } //if (len >= 8*BLOCK_SIZE) + + if (len >= 4*BLOCK_SIZE) { + // decrypt stage + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)))); + block = _mm256_xor_si256(pCounter256, tmpKey); + block1 = _mm256_xor_si256(pCounter256_1, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+1*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+2*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + IncrementRegister256(pCounter256, increment4, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+3*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+4*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+5*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+6*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + IncrementRegister256(pCounter256_1, increment4, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+7*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+8*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+9*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + if (RIJ_NR(pAES) >= 12) { + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+10*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+11*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + if (RIJ_NR(pAES) >= 14) { + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+12*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+13*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + } + } + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+RIJ_NR(pAES)*16))); + block = _mm256_aesenclast_epi128(block, tmpKey); + block1 = _mm256_aesenclast_epi128(block1, tmpKey); + + // set ciphertext + plainText = _mm256_loadu_si256((void*)pSrc); + cipherText = _mm256_xor_si256(plainText, block); + plainText_1 = _mm256_loadu_si256((void*)(pSrc+2*BLOCK_SIZE)); + cipherText_1 = _mm256_xor_si256(plainText_1, block1); + pSrc += 4*BLOCK_SIZE; + _mm256_storeu_si256((void*)pDst, cipherText); + _mm256_storeu_si256((void*)(pDst+2*BLOCK_SIZE), cipherText_1); + pDst += 4*BLOCK_SIZE; + // hash calculation stage + rpHash[0] = _mm256_xor_si256(rpHash[0], _mm256_shuffle_epi8(plainText, shuff_mask_256)); + rpHash[1] = _mm256_shuffle_epi8(plainText_1, shuff_mask_256); + resultHash = avx2_clmul_gcm4(rpHash, HashKey); + len -= 4*BLOCK_SIZE; + } //if (len >= 4*BLOCK_SIZE) + + if (len >= 2*BLOCK_SIZE) { + block = _mm256_xor_si256(pCounter256, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES))))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+1*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+2*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+3*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+4*16)))); + IncrementRegister256(pCounter256, increment2, shuffle_mask); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+5*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+6*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+7*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+8*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+9*16)))); + if (RIJ_NR(pAES) >= 12) { + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+10*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+11*16)))); + if (RIJ_NR(pAES) >= 14) { + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+12*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+13*16)))); + } + } + block = _mm256_aesenclast_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+RIJ_NR(pAES)*16)))); + + // set ciphertext + plainText = _mm256_loadu_si256((void*)pSrc); + cipherText = _mm256_xor_si256(plainText, block); + pSrc += 2*BLOCK_SIZE; + _mm256_storeu_si256((void*)pDst, cipherText); + pDst += 2*BLOCK_SIZE; + // hash calculation stage + rpHash[0] = _mm256_xor_si256(rpHash[0], _mm256_shuffle_epi8(plainText, shuff_mask_256)); + resultHash = avx2_clmul_gcm2(rpHash, HashKey); + len -= 2*BLOCK_SIZE; } - const Ipp8u* pHashedData = pSrc; - int hashedDataLen = len; + // encryption for the tail (1-3 block) + while (len >= BLOCK_SIZE) { + block = _mm256_xor_si256(pCounter256, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES))))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+1*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+2*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+3*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+4*16)))); + IncrementRegister256(pCounter256, increment1, shuffle_mask); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+5*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+6*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+7*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+8*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+9*16)))); + if (RIJ_NR(pAES) >= 12) { + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+10*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+11*16)))); + if (RIJ_NR(pAES) >= 14) { + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+12*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+13*16)))); + } + } + block = _mm256_aesenclast_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+RIJ_NR(pAES)*16)))); - // decryption for the tail (1-3 blocks) - while(len >= BLOCK_SIZE) { - aes_encoder_avx2vaes_sb(pCounter, pECounter, RIJ_NR(pAES), rkeys); - XorBlock16(pSrc, pECounter, pDst); + // set ciphertext + plainText = _mm256_loadu_si256((void*)pSrc); + cipherText = _mm256_xor_si256(plainText, block); pSrc += BLOCK_SIZE; + _mm_storeu_si128((void*)pDst, _mm256_castsi256_si128(cipherText)); pDst += BLOCK_SIZE; + // hash calculation stage + HashKey[0] = _mm256_setr_m128i(_mm_loadu_si128((void*)(AESGCM_HKEY(pState))), _mm_loadu_si128((void*)(AESGCM_HKEY(pState)))); + rpHash[0] = _mm256_xor_si256(rpHash[0], _mm256_shuffle_epi8(plainText, shuff_mask_256)); + resultHash = avx2_clmul_gcm(rpHash, HashKey); len -= BLOCK_SIZE; - IncrementCounter32(pCounter); } - aes_encoder_avx2vaes_sb(pCounter, pECounter, RIJ_NR(pAES), rkeys); - // hash calculation for the tail (1-3 blocks) - if (hashedDataLen >= BLOCK_SIZE) - AesGcmAuth_avx(AESGCM_GHASH(pState), pHashedData, hashedDataLen, AESGCM_HKEY(pState), AesGcmConst_table); + //decrypt the remainder + block = _mm256_xor_si256(pCounter256, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES))))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+1*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+2*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+3*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+4*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+5*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+6*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+7*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+8*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+9*16)))); + if (RIJ_NR(pAES) >= 12) { + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+10*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+11*16)))); + if (RIJ_NR(pAES) >= 14) { + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+12*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+13*16)))); + } + } + block = _mm256_aesenclast_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+RIJ_NR(pAES)*16)))); - // keys zeroizing - for (int i = 0; i < RIJ_NR(pAES) + 1; i++) - _mm256_storeu_si256((rkeys+i), zero_256); - _mm_storeu_si128(&tmp_keys_128, zero_128); - } -} + // loand data to the memory + _mm_storeu_si128((void*)pECounter, _mm256_castsi256_si128(block)); + _mm_storeu_si128((void*)pCounter, _mm256_castsi256_si128(pCounter256)); + resultHash = _mm_shuffle_epi8(resultHash, shuff_mask_128); + _mm_storeu_si128((void*)(AESGCM_GHASH(pState)), resultHash); + // HKeys zeroizing + for (int i = 0; i < 8; i++) + _mm256_storeu_si256((HashKey+i), zero_256); + tmpKey = _mm256_setzero_si256(); + } // if (len < 256) +} #endif /* #if (_IPP==_IPP_H9) || (_IPP32E==_IPP32E_L9) */ diff --git a/sources/ippcp/pcpaes_avx2_vaes_encrypt.c b/sources/ippcp/pcpaes_avx2_vaes_encrypt.c index 8c3b93de..2c6f8bf1 100644 --- a/sources/ippcp/pcpaes_avx2_vaes_encrypt.c +++ b/sources/ippcp/pcpaes_avx2_vaes_encrypt.c @@ -5,7 +5,7 @@ * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * -* http://www.apache.org/licenses/LICENSE-2.0 +* http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -32,179 +32,546 @@ IPP_OWN_DEFN (void, AesGcmEnc_vaes_avx2, (Ipp8u* pDst, const Ipp8u* pSrc, int le if (len < 256) { IppsAESSpec* pAES = AESGCM_CIPHER(pState); RijnCipher encoder = RIJ_ENCODER(pAES); + Ipp8u hkeys_old_order[48]; + + // put the hash keys in the correct order (hKey*t, (hKey*t)^2, (hKey*t)^4) + for (int i = 0; i < 32; i++) { + *(hkeys_old_order+i) = *(AESGCM_HKEY(pState)+i); // HKEY 0-32 + if (i < 16) + *(hkeys_old_order+i+32) = *(AESGCM_HKEY(pState)+i+48); // HKEY 32-48 + } + AesGcmEnc_avx(pDst, pSrc, len, encoder, RIJ_NR(pAES), RIJ_EKEYS(pAES), AESGCM_GHASH(pState), - AESGCM_COUNTER(pState), AESGCM_ECOUNTER(pState), AESGCM_HKEY(pState)); + AESGCM_COUNTER(pState), AESGCM_ECOUNTER(pState), hkeys_old_order); + + // zeroizing + zeroize_256((Ipp32u*)hkeys_old_order, 12); } else { - const int nloop = len / STEP_SIZE; IppsRijndael128Spec* pAES = AESGCM_CIPHER(pState); Ipp8u* pCounter = AESGCM_COUNTER(pState); Ipp8u* pECounter = AESGCM_ECOUNTER(pState); - __m256i pCounter256, pCounter256_1, pECounter256, pECounter256_1; - __m256i block, block1, cipherText, cipherText_1; + __m256i pCounter256, pCounter256_1, pCounter256_2, pCounter256_3, pCounter256_4, pCounter256_5, pCounter256_6, pCounter256_7; + __m256i block, block1, block2, block3, block4, block5, block6, block7; + __m256i cipherText, cipherText_1, cipherText_2, cipherText_3, cipherText_4, cipherText_5, cipherText_6, cipherText_7; + __m256i rpHash[8]; + __m256i HashKey[8]; + __m128i resultHash = _mm_setzero_si128(); + __m256i tmpKey; // setting temporary data for incremention + const __m256i increment1 = _mm256_loadu_si256((void*)_increment1); // increment by 1 const __m256i increment2 = _mm256_loadu_si256((void*)_increment2); // increment by 2 - const __m256i increment4 = _mm256_loadu_si256((void*)_increment4); // increment by 4 - const __m256i shuffle_mask = _mm256_loadu_si256((void*)swapBytes256); + const __m256i increment4 = _mm256_loadu_si256((void*)_increment4); // increment by 4 + const __m256i increment8 = _mm256_loadu_si256((void*)_increment8); // increment by 8 + const __m256i increment16 = _mm256_loadu_si256((void*)_increment16); // increment by 16 + const __m256i shuffle_mask = _mm256_loadu_si256((void*)swapBytes256); - // vectors are used to zeroizing - __m128i zero_128 = _mm_setzero_si128(); + // vector is used to zeroizing __m256i zero_256 = _mm256_setzero_si256(); - // loading keys from memory - __m256i rkeys[MAX_NK]; - __m128i tmp_keys_128; - for (int i = 0; i < RIJ_NR(pAES) + 1; i++) { - tmp_keys_128 = _mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+i*16)); - rkeys[i] = _mm256_setr_m128i(tmp_keys_128, tmp_keys_128); + // setting some masks + const __m128i shuff_mask_128 = _mm_loadu_si128((void*)_shuff_mask_128); + const __m256i shuff_mask_256 = _mm256_loadu_si256((void*)_shuff_mask_256); + + // loading counters from memory + __m128i lo = _mm_loadu_si128((void*)pCounter); + IncrementCounter32(pCounter); + __m128i hi = _mm_loadu_si128((void*)pCounter); + pCounter256_7 = _mm256_setr_m128i(lo, hi); + pCounter256 = pCounter256_7; + IncrementRegister256(pCounter256_7, increment2, shuffle_mask); + pCounter256_1 = pCounter256_7; + IncrementRegister256(pCounter256_7, increment2, shuffle_mask); + pCounter256_2 = pCounter256_7; + IncrementRegister256(pCounter256_7, increment2, shuffle_mask); + pCounter256_3 = pCounter256_7; + IncrementRegister256(pCounter256_7, increment2, shuffle_mask); + pCounter256_4 = pCounter256_7; + IncrementRegister256(pCounter256_7, increment2, shuffle_mask); + pCounter256_5 = pCounter256_7; + IncrementRegister256(pCounter256_7, increment2, shuffle_mask); + pCounter256_6 = pCounter256_7; + IncrementRegister256(pCounter256_7, increment2, shuffle_mask); + + lo = _mm_loadu_si128((__m128i*)AESGCM_GHASH(pState)); + hi = _mm_setzero_si128(); + rpHash[0] = _mm256_setr_m128i(_mm_shuffle_epi8(lo, shuff_mask_128), hi); + + // setting hash keys + Ipp8u *pkeys = AESGCM_HKEY(pState); + for (int i = 0; i < 8; i++) { + HashKey[i] = _mm256_setr_m128i(_mm_loadu_si128((void*)(pkeys+16)), _mm_loadu_si128((void*)pkeys)); + pkeys += 32; } - // skip extra calculations if plaintext less than 4 blocks - if (nloop) { - // loading counters from memory - __m128i lo, hi; - lo = _mm_loadu_si128((void*)pCounter); - IncrementCounter32(pCounter); - hi = _mm_loadu_si128((void*)pCounter); - pCounter256_1 = _mm256_setr_m128i(lo, hi); - pCounter256 = pCounter256_1; - IncrementRegister256(pCounter256_1, increment2, shuffle_mask); - - // setting some masks - const __m128i shuff_mask_128 = _mm_loadu_si128((void*)_shuff_mask_128); - const __m256i shuff_mask_256 = _mm256_loadu_si256((void*)_shuff_mask_256); - const __m256i mask_lo_256 = _mm256_loadu_si256((void*)_mask_lo_256); - const __m256i mask_hi_256 = _mm256_loadu_si256((void*)_mask_hi_256); - - lo = _mm_loadu_si128((__m128i*)AESGCM_GHASH(pState)); - hi = _mm_setzero_si128(); - __m256i rpHash0 = _mm256_setr_m128i(_mm_shuffle_epi8(lo, shuff_mask_128), hi); - __m256i rpHash1 = _mm256_setzero_si256(); - - // setting pre-calculated data for hash combining - Ipp8u *pkeys = AESGCM_HKEY(pState); - __m128i HashKey0 = _mm_loadu_si128((void*)pkeys); - pkeys += 16; - __m128i HashKey2 = _mm_loadu_si128((void*)pkeys); - pkeys += 16; - __m128i HashKey4 = _mm_loadu_si128((void*)pkeys); - - // setting pre-calculated data in correct order for Karatsuba method - __m256i HKey = _mm256_setr_m128i(HashKey4, HashKey4); - __m256i HKeyKaratsuba = _mm256_shuffle_epi32(HKey, SHUFD_MASK); - HKeyKaratsuba = _mm256_xor_si256(HKey, HKeyKaratsuba); - do { - // encrypt stage - block = _mm256_xor_si256(pCounter256, *rkeys); - block1 = _mm256_xor_si256(pCounter256_1, *rkeys); - block = _mm256_aesenc_epi128(block, *(rkeys+1)); - block1 = _mm256_aesenc_epi128(block1, *(rkeys+1)); - block = _mm256_aesenc_epi128(block, *(rkeys+2)); - block1 = _mm256_aesenc_epi128(block1, *(rkeys+2)); - block = _mm256_aesenc_epi128(block, *(rkeys+3)); - block1 = _mm256_aesenc_epi128(block1, *(rkeys+3)); - IncrementRegister256(pCounter256, increment4, shuffle_mask); - block = _mm256_aesenc_epi128(block, *(rkeys+4)); - block1 = _mm256_aesenc_epi128(block1, *(rkeys+4)); - block = _mm256_aesenc_epi128(block, *(rkeys+5)); - block1 = _mm256_aesenc_epi128(block1, *(rkeys+5)); - block = _mm256_aesenc_epi128(block, *(rkeys+6)); - block1 = _mm256_aesenc_epi128(block1, *(rkeys+6)); - block = _mm256_aesenc_epi128(block, *(rkeys+7)); - block1 = _mm256_aesenc_epi128(block1, *(rkeys+7)); - block = _mm256_aesenc_epi128(block, *(rkeys+8)); - block1 = _mm256_aesenc_epi128(block1, *(rkeys+8)); - block = _mm256_aesenc_epi128(block, *(rkeys+9)); - block1 = _mm256_aesenc_epi128(block1, *(rkeys+9)); - IncrementRegister256(pCounter256_1, increment4, shuffle_mask); - if (RIJ_NR(pAES) >= 12) { - block = _mm256_aesenc_epi128(block, *(rkeys+10)); - block1 = _mm256_aesenc_epi128(block1, *(rkeys+10)); - block = _mm256_aesenc_epi128(block, *(rkeys+11)); - block1 = _mm256_aesenc_epi128(block1, *(rkeys+11)); - if (RIJ_NR(pAES) >= 14) { - block = _mm256_aesenc_epi128(block, *(rkeys+12)); - block1 = _mm256_aesenc_epi128(block1, *(rkeys+12)); - block = _mm256_aesenc_epi128(block, *(rkeys+13)); - block1 = _mm256_aesenc_epi128(block1, *(rkeys+13)); - } - } - pECounter256 = _mm256_aesenclast_epi128(block, *(rkeys+RIJ_NR(pAES))); - pECounter256_1 = _mm256_aesenclast_epi128(block1, *(rkeys+RIJ_NR(pAES))); - - // set ciphertext - cipherText = _mm256_xor_si256( _mm256_loadu_si256((void*)pSrc), pECounter256); - pSrc += HALF_STEP_SIZE; - cipherText_1 = _mm256_xor_si256( _mm256_loadu_si256((void*)pSrc), pECounter256_1); - pSrc += HALF_STEP_SIZE; - - // hash calculation stage - rpHash0 = _mm256_xor_si256(rpHash0, _mm256_shuffle_epi8(cipherText, shuff_mask_256)); - _mm256_storeu_si256((void*)pDst, cipherText); - pDst += HALF_STEP_SIZE; - _mm256_storeu_si256((void*)pDst, cipherText_1); - pDst += HALF_STEP_SIZE; - rpHash1 = _mm256_xor_si256(rpHash1, _mm256_shuffle_epi8(cipherText_1, shuff_mask_256)); - len -= STEP_SIZE; - if (len >= STEP_SIZE) { - avx2_clmul_gcm(&rpHash0, &HKey, &HKeyKaratsuba, &mask_lo_256, &mask_hi_256); - avx2_clmul_gcm(&rpHash1, &HKey, &HKeyKaratsuba, &mask_lo_256, &mask_hi_256); + while(len >= 16*BLOCK_SIZE) { + // encrypt stage + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)))); + block = _mm256_xor_si256(pCounter256, tmpKey); + block1 = _mm256_xor_si256(pCounter256_1, tmpKey); + block2 = _mm256_xor_si256(pCounter256_2, tmpKey); + block3 = _mm256_xor_si256(pCounter256_3, tmpKey); + block4 = _mm256_xor_si256(pCounter256_4, tmpKey); + block5 = _mm256_xor_si256(pCounter256_5, tmpKey); + block6 = _mm256_xor_si256(pCounter256_6, tmpKey); + block7 = _mm256_xor_si256(pCounter256_7, tmpKey); + IncrementRegister256(pCounter256, increment16, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+1*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + block4 = _mm256_aesenc_epi128(block4, tmpKey); + block5 = _mm256_aesenc_epi128(block5, tmpKey); + block6 = _mm256_aesenc_epi128(block6, tmpKey); + block7 = _mm256_aesenc_epi128(block7, tmpKey); + IncrementRegister256(pCounter256_1, increment16, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+2*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + block4 = _mm256_aesenc_epi128(block4, tmpKey); + block5 = _mm256_aesenc_epi128(block5, tmpKey); + block6 = _mm256_aesenc_epi128(block6, tmpKey); + block7 = _mm256_aesenc_epi128(block7, tmpKey); + IncrementRegister256(pCounter256_2, increment16, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+3*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + block4 = _mm256_aesenc_epi128(block4, tmpKey); + block5 = _mm256_aesenc_epi128(block5, tmpKey); + block6 = _mm256_aesenc_epi128(block6, tmpKey); + block7 = _mm256_aesenc_epi128(block7, tmpKey); + IncrementRegister256(pCounter256_3, increment16, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+4*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + block4 = _mm256_aesenc_epi128(block4, tmpKey); + block5 = _mm256_aesenc_epi128(block5, tmpKey); + block6 = _mm256_aesenc_epi128(block6, tmpKey); + block7 = _mm256_aesenc_epi128(block7, tmpKey); + IncrementRegister256(pCounter256_4, increment16, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+5*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + block4 = _mm256_aesenc_epi128(block4, tmpKey); + block5 = _mm256_aesenc_epi128(block5, tmpKey); + block6 = _mm256_aesenc_epi128(block6, tmpKey); + block7 = _mm256_aesenc_epi128(block7, tmpKey); + IncrementRegister256(pCounter256_5, increment16, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+6*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + block4 = _mm256_aesenc_epi128(block4, tmpKey); + block5 = _mm256_aesenc_epi128(block5, tmpKey); + block6 = _mm256_aesenc_epi128(block6, tmpKey); + block7 = _mm256_aesenc_epi128(block7, tmpKey); + IncrementRegister256(pCounter256_6, increment16, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+7*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + block4 = _mm256_aesenc_epi128(block4, tmpKey); + block5 = _mm256_aesenc_epi128(block5, tmpKey); + block6 = _mm256_aesenc_epi128(block6, tmpKey); + block7 = _mm256_aesenc_epi128(block7, tmpKey); + IncrementRegister256(pCounter256_7, increment16, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+8*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + block4 = _mm256_aesenc_epi128(block4, tmpKey); + block5 = _mm256_aesenc_epi128(block5, tmpKey); + block6 = _mm256_aesenc_epi128(block6, tmpKey); + block7 = _mm256_aesenc_epi128(block7, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+9*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + block4 = _mm256_aesenc_epi128(block4, tmpKey); + block5 = _mm256_aesenc_epi128(block5, tmpKey); + block6 = _mm256_aesenc_epi128(block6, tmpKey); + block7 = _mm256_aesenc_epi128(block7, tmpKey); + if (RIJ_NR(pAES) >= 12) { + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+10*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + block4 = _mm256_aesenc_epi128(block4, tmpKey); + block5 = _mm256_aesenc_epi128(block5, tmpKey); + block6 = _mm256_aesenc_epi128(block6, tmpKey); + block7 = _mm256_aesenc_epi128(block7, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+11*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + block4 = _mm256_aesenc_epi128(block4, tmpKey); + block5 = _mm256_aesenc_epi128(block5, tmpKey); + block6 = _mm256_aesenc_epi128(block6, tmpKey); + block7 = _mm256_aesenc_epi128(block7, tmpKey); + if (RIJ_NR(pAES) >= 14) { + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+12*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + block4 = _mm256_aesenc_epi128(block4, tmpKey); + block5 = _mm256_aesenc_epi128(block5, tmpKey); + block6 = _mm256_aesenc_epi128(block6, tmpKey); + block7 = _mm256_aesenc_epi128(block7, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+13*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + block4 = _mm256_aesenc_epi128(block4, tmpKey); + block5 = _mm256_aesenc_epi128(block5, tmpKey); + block6 = _mm256_aesenc_epi128(block6, tmpKey); + block7 = _mm256_aesenc_epi128(block7, tmpKey); + } + } + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+RIJ_NR(pAES)*16))); + block = _mm256_aesenclast_epi128(block, tmpKey); + block1 = _mm256_aesenclast_epi128(block1, tmpKey); + block2 = _mm256_aesenclast_epi128(block2, tmpKey); + block3 = _mm256_aesenclast_epi128(block3, tmpKey); + block4 = _mm256_aesenclast_epi128(block4, tmpKey); + block5 = _mm256_aesenclast_epi128(block5, tmpKey); + block6 = _mm256_aesenclast_epi128(block6, tmpKey); + block7 = _mm256_aesenclast_epi128(block7, tmpKey); + + // set ciphertext + cipherText = _mm256_xor_si256( _mm256_loadu_si256((void*)pSrc), block); + cipherText_1 = _mm256_xor_si256( _mm256_loadu_si256((void*)(pSrc+2*BLOCK_SIZE)), block1); + cipherText_2 = _mm256_xor_si256( _mm256_loadu_si256((void*)(pSrc+4*BLOCK_SIZE)), block2); + cipherText_3 = _mm256_xor_si256( _mm256_loadu_si256((void*)(pSrc+6*BLOCK_SIZE)), block3); + cipherText_4 = _mm256_xor_si256( _mm256_loadu_si256((void*)(pSrc+8*BLOCK_SIZE)), block4); + cipherText_5 = _mm256_xor_si256( _mm256_loadu_si256((void*)(pSrc+10*BLOCK_SIZE)), block5); + cipherText_6 = _mm256_xor_si256( _mm256_loadu_si256((void*)(pSrc+12*BLOCK_SIZE)), block6); + cipherText_7 = _mm256_xor_si256( _mm256_loadu_si256((void*)(pSrc+14*BLOCK_SIZE)), block7); + pSrc += 16*BLOCK_SIZE; + _mm256_storeu_si256((void*)pDst, cipherText); + _mm256_storeu_si256((void*)(pDst+2*BLOCK_SIZE), cipherText_1); + _mm256_storeu_si256((void*)(pDst+4*BLOCK_SIZE), cipherText_2); + _mm256_storeu_si256((void*)(pDst+6*BLOCK_SIZE), cipherText_3); + _mm256_storeu_si256((void*)(pDst+8*BLOCK_SIZE), cipherText_4); + _mm256_storeu_si256((void*)(pDst+10*BLOCK_SIZE), cipherText_5); + _mm256_storeu_si256((void*)(pDst+12*BLOCK_SIZE), cipherText_6); + _mm256_storeu_si256((void*)(pDst+14*BLOCK_SIZE), cipherText_7); + pDst += 16*BLOCK_SIZE; + + // hash calculation stage + rpHash[0] = _mm256_xor_si256(rpHash[0], _mm256_shuffle_epi8(cipherText, shuff_mask_256)); + rpHash[1] = _mm256_shuffle_epi8(cipherText_1, shuff_mask_256); + rpHash[2] = _mm256_shuffle_epi8(cipherText_2, shuff_mask_256); + rpHash[3] = _mm256_shuffle_epi8(cipherText_3, shuff_mask_256); + rpHash[4] = _mm256_shuffle_epi8(cipherText_4, shuff_mask_256); + rpHash[5] = _mm256_shuffle_epi8(cipherText_5, shuff_mask_256); + rpHash[6] = _mm256_shuffle_epi8(cipherText_6, shuff_mask_256); + rpHash[7] = _mm256_shuffle_epi8(cipherText_7, shuff_mask_256); + resultHash = avx2_clmul_gcm16(rpHash, HashKey); + + len -= 16*BLOCK_SIZE; + } // while(len >= 16*BLOCK_SIZE) + + if (len >= 8*BLOCK_SIZE) { + // encrypt stage + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)))); + block = _mm256_xor_si256(pCounter256, tmpKey); + block1 = _mm256_xor_si256(pCounter256_1, tmpKey); + block2 = _mm256_xor_si256(pCounter256_2, tmpKey); + block3 = _mm256_xor_si256(pCounter256_3, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+1*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + IncrementRegister256(pCounter256, increment8, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+2*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+3*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + IncrementRegister256(pCounter256_1, increment8, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+4*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+5*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + IncrementRegister256(pCounter256_2, increment8, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+6*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+7*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+8*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + IncrementRegister256(pCounter256_3, increment8, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+9*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + if (RIJ_NR(pAES) >= 12) { + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+10*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+11*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + if (RIJ_NR(pAES) >= 14) { + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+12*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+13*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + block2 = _mm256_aesenc_epi128(block2, tmpKey); + block3 = _mm256_aesenc_epi128(block3, tmpKey); } - } while(len >= STEP_SIZE); - - // loading temporary data to memory - _mm_storeu_si128((void*)pECounter, _mm256_extractf128_si256(pECounter256, 1)); - _mm_storeu_si128((void*)pCounter, _mm256_castsi256_si128(pCounter256)); - - // combine hash - __m128i GHash0 = _mm256_extractf128_si256(rpHash0, 0); - __m128i GHash1 = _mm256_extractf128_si256(rpHash0, 1); - __m128i GHash2 = _mm256_extractf128_si256(rpHash1, 0); - __m128i GHash3 = _mm256_extractf128_si256(rpHash1, 1); - - sse_clmul_gcm(&GHash0, &HashKey4); //GHash0 = GHash0 * (HashKey^4)<<1 mod poly - sse_clmul_gcm(&GHash1, &HashKey2); //GHash1 = GHash1 * (HashKey^2)<<1 mod poly - sse_clmul_gcm(&GHash2, &HashKey0); //GHash2 = GHash2 * (HashKey^1)<<1 mod poly - GHash3 = _mm_xor_si128(GHash3, GHash1); - GHash3 = _mm_xor_si128(GHash3, GHash2); - - sse_clmul_gcm(&GHash3, &HashKey0); //GHash3 = GHash3 * (HashKey)<<1 mod poly - GHash3 = _mm_xor_si128(GHash3, GHash0); - GHash3 = _mm_shuffle_epi8(GHash3, shuff_mask_128); - _mm_storeu_si128((void*)(AESGCM_GHASH(pState)), GHash3); - - // HKeys zeroizing - _mm_storeu_si128(&HashKey0, zero_128); - _mm_storeu_si128(&HashKey2, zero_128); - _mm_storeu_si128(&HashKey4, zero_128); - _mm256_storeu_si256(&HKey, zero_256); - _mm256_storeu_si256(&HKeyKaratsuba, zero_256); + } + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+RIJ_NR(pAES)*16))); + block = _mm256_aesenclast_epi128(block, tmpKey); + block1 = _mm256_aesenclast_epi128(block1, tmpKey); + block2 = _mm256_aesenclast_epi128(block2, tmpKey); + block3 = _mm256_aesenclast_epi128(block3, tmpKey); + + // set ciphertext + cipherText = _mm256_xor_si256( _mm256_loadu_si256((void*)pSrc), block); + pSrc += 2*BLOCK_SIZE; + cipherText_1 = _mm256_xor_si256( _mm256_loadu_si256((void*)pSrc), block1); + pSrc += 2*BLOCK_SIZE; + cipherText_2 = _mm256_xor_si256( _mm256_loadu_si256((void*)pSrc), block2); + pSrc += 2*BLOCK_SIZE; + cipherText_3 = _mm256_xor_si256( _mm256_loadu_si256((void*)pSrc), block3); + pSrc += 2*BLOCK_SIZE; + _mm256_storeu_si256((void*)pDst, cipherText); + pDst += 2*BLOCK_SIZE; + _mm256_storeu_si256((void*)pDst, cipherText_1); + pDst += 2*BLOCK_SIZE; + _mm256_storeu_si256((void*)pDst, cipherText_2); + pDst += 2*BLOCK_SIZE; + _mm256_storeu_si256((void*)pDst, cipherText_3); + pDst += 2*BLOCK_SIZE; + + // hash calculation stage + rpHash[0] = _mm256_xor_si256(rpHash[0], _mm256_shuffle_epi8(cipherText, shuff_mask_256)); + rpHash[1] = _mm256_shuffle_epi8(cipherText_1, shuff_mask_256); + rpHash[2] = _mm256_shuffle_epi8(cipherText_2, shuff_mask_256); + rpHash[3] = _mm256_shuffle_epi8(cipherText_3, shuff_mask_256); + resultHash = avx2_clmul_gcm8(rpHash, HashKey); + + len -= 8*BLOCK_SIZE; + } //if (len >= 8*BLOCK_SIZE) + + if (len >= 4*BLOCK_SIZE) { + // encrypt stage + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)))); + block = _mm256_xor_si256(pCounter256, tmpKey); + block1 = _mm256_xor_si256(pCounter256_1, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+1*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+2*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + IncrementRegister256(pCounter256, increment4, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+3*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+4*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+5*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+6*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + IncrementRegister256(pCounter256_1, increment4, shuffle_mask); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+7*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+8*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+9*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + if (RIJ_NR(pAES) >= 12) { + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+10*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+11*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + if (RIJ_NR(pAES) >= 14) { + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+12*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+13*16))); + block = _mm256_aesenc_epi128(block, tmpKey); + block1 = _mm256_aesenc_epi128(block1, tmpKey); + } + } + tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+RIJ_NR(pAES)*16))); + block = _mm256_aesenclast_epi128(block, tmpKey); + block1 = _mm256_aesenclast_epi128(block1, tmpKey); + + // set ciphertext + cipherText = _mm256_xor_si256( _mm256_loadu_si256((void*)pSrc), block); + pSrc += 2*BLOCK_SIZE; + cipherText_1 = _mm256_xor_si256( _mm256_loadu_si256((void*)pSrc), block1); + pSrc += 2*BLOCK_SIZE; + _mm256_storeu_si256((void*)pDst, cipherText); + pDst += 2*BLOCK_SIZE; + _mm256_storeu_si256((void*)pDst, cipherText_1); + pDst += 2*BLOCK_SIZE; + // hash calculation stage + rpHash[0] = _mm256_xor_si256(rpHash[0], _mm256_shuffle_epi8(cipherText, shuff_mask_256)); + rpHash[1] = _mm256_shuffle_epi8(cipherText_1, shuff_mask_256); + resultHash = avx2_clmul_gcm4(rpHash, HashKey); + len -= 4*BLOCK_SIZE; + } //if (len >= 4*BLOCK_SIZE) + + if (len >= 2*BLOCK_SIZE) { + block = _mm256_xor_si256(pCounter256, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES))))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+1*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+2*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+3*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+4*16)))); + IncrementRegister256(pCounter256, increment2, shuffle_mask); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+5*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+6*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+7*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+8*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+9*16)))); + if (RIJ_NR(pAES) >= 12) { + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+10*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+11*16)))); + if (RIJ_NR(pAES) >= 14) { + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+12*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+13*16)))); + } + } + block = _mm256_aesenclast_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+RIJ_NR(pAES)*16)))); + + // set ciphertext + cipherText = _mm256_xor_si256( _mm256_loadu_si256((void*)pSrc), block); + pSrc += 2*BLOCK_SIZE; + _mm256_storeu_si256((void*)pDst, cipherText); + pDst += 2*BLOCK_SIZE; + // hash calculation stage + rpHash[0] = _mm256_xor_si256(rpHash[0], _mm256_shuffle_epi8(cipherText, shuff_mask_256)); + resultHash = avx2_clmul_gcm2(rpHash, HashKey); + len -= 2*BLOCK_SIZE; } - Ipp8u* pHashedData = pDst; - int hashedDataLen = len; + // encryption for the tail (1-3 block) + while (len >= BLOCK_SIZE) { + block = _mm256_xor_si256(pCounter256, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES))))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+1*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+2*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+3*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+4*16)))); + IncrementRegister256(pCounter256, increment1, shuffle_mask); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+5*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+6*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+7*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+8*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+9*16)))); + if (RIJ_NR(pAES) >= 12) { + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+10*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+11*16)))); + if (RIJ_NR(pAES) >= 14) { + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+12*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+13*16)))); + } + } + block = _mm256_aesenclast_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+RIJ_NR(pAES)*16)))); - // encryption for the tail (1-3 blocks) - while(len >= BLOCK_SIZE) { - aes_encoder_avx2vaes_sb(pCounter, pECounter, RIJ_NR(pAES), rkeys); - XorBlock16(pSrc, pECounter, pDst); + // set ciphertext + cipherText = _mm256_xor_si256(_mm256_loadu_si256((void*)pSrc), block); pSrc += BLOCK_SIZE; + _mm_storeu_si128((void*)pDst, _mm256_castsi256_si128(cipherText)); pDst += BLOCK_SIZE; + // hash calculation stage + HashKey[0] = _mm256_setr_m128i(_mm_loadu_si128((void*)(AESGCM_HKEY(pState))), _mm_loadu_si128((void*)(AESGCM_HKEY(pState)))); + rpHash[0] = _mm256_xor_si256(rpHash[0], _mm256_shuffle_epi8(cipherText, shuff_mask_256)); + resultHash = avx2_clmul_gcm(rpHash, HashKey); len -= BLOCK_SIZE; - IncrementCounter32(pCounter); } - aes_encoder_avx2vaes_sb(pCounter, pECounter, RIJ_NR(pAES), rkeys); - // hash calculation for the tail (1-3 blocks) - if (hashedDataLen >= BLOCK_SIZE) - AesGcmAuth_avx(AESGCM_GHASH(pState), pHashedData, hashedDataLen, AESGCM_HKEY(pState), AesGcmConst_table); + //encrypt the remainder + block = _mm256_xor_si256(pCounter256, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES))))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+1*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+2*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+3*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+4*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+5*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+6*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+7*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+8*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+9*16)))); + if (RIJ_NR(pAES) >= 12) { + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+10*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+11*16)))); + if (RIJ_NR(pAES) >= 14) { + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+12*16)))); + block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+13*16)))); + } + } + block = _mm256_aesenclast_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+RIJ_NR(pAES)*16)))); - // keys zeroizing - for (int i = 0; i < RIJ_NR(pAES) + 1; i++) - _mm256_storeu_si256((rkeys+i), zero_256); - _mm_storeu_si128(&tmp_keys_128, zero_128); - } -} + // loand data to the memory + _mm_storeu_si128((void*)pECounter, _mm256_castsi256_si128(block)); + _mm_storeu_si128((void*)pCounter, _mm256_castsi256_si128(pCounter256)); + resultHash = _mm_shuffle_epi8(resultHash, shuff_mask_128); + _mm_storeu_si128((void*)(AESGCM_GHASH(pState)), resultHash); + // HKeys zeroizing + for (int i = 0; i < 8; i++) + _mm256_storeu_si256((HashKey+i), zero_256); + tmpKey = _mm256_setzero_si256(); + } // if (len < 256) +} #endif /* #if (_IPP==_IPP_H9) || (_IPP32E==_IPP32E_L9) */ diff --git a/sources/ippcp/pcpaes_ccmstart.c b/sources/ippcp/pcpaes_ccmstart.c index 9eebde71..bde0f5dd 100644 --- a/sources/ippcp/pcpaes_ccmstart.c +++ b/sources/ippcp/pcpaes_ccmstart.c @@ -16,7 +16,7 @@ /* // Intel(R) Integrated Performance Primitives. Cryptography Primitives. -// +// // Context: // ippsAES_CCMStart() // @@ -34,7 +34,7 @@ /*F* // Name: ippsAES_CCMStart // -// Purpose: Start the process (encryption+generation) or (decryption+veryfication). +// Purpose: Start the process (encryption+generation) or (decryption+verification). // // Returns: Reason: // ippStsNullPtrErr pState == NULL diff --git a/sources/ippcp/pcpaes_cfbdecrypt_vaes512.c b/sources/ippcp/pcpaes_cfbdecrypt_vaes512.c index c3e94a50..a29a2528 100644 --- a/sources/ippcp/pcpaes_cfbdecrypt_vaes512.c +++ b/sources/ippcp/pcpaes_cfbdecrypt_vaes512.c @@ -37,14 +37,14 @@ #pragma warning(disable: 4310) // zmmintrin.h bug: truncation of constant value #endif -__INLINE Ipp64u broadcast_16to64(Ipp16u mask16) +__IPPCP_INLINE Ipp64u broadcast_16to64(Ipp16u mask16) { Ipp64u mask64 = (Ipp64u)mask16; mask64 = (mask64 << 48) | (mask64 << 32) | (mask64 << 16) | mask64; return mask64; } -__INLINE __m512i getInputBlocks(__m128i * const currentState, const __m512i * const pCipherBlocks, __mmask16 blocksCompressMask) +__IPPCP_INLINE __m512i getInputBlocks(__m128i * const currentState, const __m512i * const pCipherBlocks, __mmask16 blocksCompressMask) { // extract 128-bit cipher blocks __m128i c0 = _mm512_extracti64x2_epi64(*pCipherBlocks, 0); diff --git a/sources/ippcp/pcpaes_cmac_stuff.h b/sources/ippcp/pcpaes_cmac_stuff.h index 13f26813..a2eaba70 100644 --- a/sources/ippcp/pcpaes_cmac_stuff.h +++ b/sources/ippcp/pcpaes_cmac_stuff.h @@ -15,11 +15,11 @@ *************************************************************************/ /* -// +// // Purpose: // Cryptography Primitive. // AES-CMAC Functions -// +// // Contents: // init() // @@ -34,7 +34,7 @@ #if !defined(_PCP_AES_CMAC_STUFF_H_) #define _PCP_AES_CMAC_STUFF_H_ -__INLINE int cpSizeofCtx_AESCMAC(void) +__IPPCP_INLINE int cpSizeofCtx_AESCMAC(void) { return sizeof(IppsAES_CMACState); } diff --git a/sources/ippcp/pcpaes_cmacupdate.c b/sources/ippcp/pcpaes_cmacupdate.c index b21266c6..d18e2252 100644 --- a/sources/ippcp/pcpaes_cmacupdate.c +++ b/sources/ippcp/pcpaes_cmacupdate.c @@ -15,11 +15,11 @@ *************************************************************************/ /* -// +// // Purpose: // Cryptography Primitive. // AES-CMAC Functions -// +// // Contents: // ippsAES_CMACUpdate() // @@ -181,12 +181,12 @@ IPPFUN(IppStatus, ippsAES_CMACUpdate,(const Ipp8u* pSrc, int len, IppsAES_CMACSt } /* - // remaind + // remainder */ if(len) { /* workaround to avoid false positive stringop-overflow error on gcc10.1 and gcc11.1 */ len = ( IPP_MIN(len, MBS_RIJ128) ); - + CopyBlock(pSrc, (Ipp8u*)(&CMAC_BUFF(pState)), len); /* update internal buffer filling */ CMAC_INDX(pState) += len; diff --git a/sources/ippcp/pcpaes_ctr_process.h b/sources/ippcp/pcpaes_ctr_process.h index 445fda11..28f44ee1 100644 --- a/sources/ippcp/pcpaes_ctr_process.h +++ b/sources/ippcp/pcpaes_ctr_process.h @@ -61,7 +61,7 @@ // counter will updated on return // */ -__INLINE void MaskCounter128(Ipp8u* pMaskIV, int ctrBtSize) +__IPPCP_INLINE void MaskCounter128(Ipp8u* pMaskIV, int ctrBtSize) { /* construct ctr mask */ int maskPosition = (MBS_RIJ128*8-ctrBtSize)/8; @@ -99,15 +99,15 @@ IppStatus cpProcessAES_ctr(const Ipp8u* pSrc, Ipp8u* pDst, int dataLen, if(ctrNumBitSize < (8 * (int)sizeof(int) - 5)) { /* - // dataLen is int, and it is always positive - // data blocks number compute from dataLen - // by dividing it to MBS_RIJ128 = 16 - // and additing 1 if dataLen % 16 != 0 - // so if ctrNumBitSize >= 8 * sizeof(int) - 5 - // function can process data with any possible - // passed dataLen without counter overflow + // dataLen is int, and it is always positive + // data blocks number compute from dataLen + // by dividing it to MBS_RIJ128 = 16 + // and additing 1 if dataLen % 16 != 0 + // so if ctrNumBitSize >= 8 * sizeof(int) - 5 + // function can process data with any possible + // passed dataLen without counter overflow */ - + int dataBlocksNum = dataLen >> 4; if(dataLen & 15){ dataBlocksNum++; diff --git a/sources/ippcp/pcpaes_ctrencrypt_rij128pipe_vaes512.c b/sources/ippcp/pcpaes_ctrencrypt_rij128pipe_vaes512.c index 7dd00820..eb60083f 100644 --- a/sources/ippcp/pcpaes_ctrencrypt_rij128pipe_vaes512.c +++ b/sources/ippcp/pcpaes_ctrencrypt_rij128pipe_vaes512.c @@ -54,7 +54,7 @@ static __ALIGN64 Ipp64u nextIncLoMask[] = { 0x0, 0x4, 0x0, 0x4, 0x0, 0x4, 0x0, static __ALIGN64 Ipp64u incLoByOneMask[] = { 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1 }; static __ALIGN64 Ipp64u incHiByOneMask[] = { 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0 }; -__INLINE __m512i adcLo_epi64(__m512i a, __m512i b) +__IPPCP_INLINE __m512i adcLo_epi64(__m512i a, __m512i b) { a = _mm512_add_epi64(a, b); // check overflow in each low 64-bit of 128-bit numbers @@ -65,7 +65,7 @@ __INLINE __m512i adcLo_epi64(__m512i a, __m512i b) return a; } -__INLINE __m512i applyNonce(__m512i a, __m512i ctrBitMask, __m512i templateCtr) +__IPPCP_INLINE __m512i applyNonce(__m512i a, __m512i ctrBitMask, __m512i templateCtr) { a = _mm512_shuffle_epi8(a, M512(swapBytes)); a = _mm512_and_epi64(a, ctrBitMask); diff --git a/sources/ippcp/pcpaes_gcm_internal_func.c b/sources/ippcp/pcpaes_gcm_internal_func.c new file mode 100644 index 00000000..43ee84eb --- /dev/null +++ b/sources/ippcp/pcpaes_gcm_internal_func.c @@ -0,0 +1,257 @@ +/************************************************************************* +* Copyright (C) 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*************************************************************************/ + +/* +// +// Purpose: +// Cryptography Primitive. +// * Initialization functions for internal methods and pointers inside AES-GCM context; +// * AES-GCM encryption kernels with the conditional noise injections mechanism; +// +*/ + +#include "pcpaes_gcm_internal_func.h" +#include "aes_gcm_avx512.h" +#include "owncp.h" +#include "pcpaesm.h" +#include "pcptool.h" + +#if (_IPP32E >= _IPP32E_K0) +#include "pcpaesauthgcm_avx512.h" +#else +#include "pcpaesauthgcm.h" +#endif /* #if(_IPP32E>=_IPP32E_K0) */ + +/* + * This function dispatches to the right internal methods and sets pointers to them inside the AES-GCM state. + */ +IPP_OWN_DEFN(void, cpAesGCM_setup_ptrs_and_methods, (IppsAES_GCMState * pState, Ipp64u keyByteLen)) +{ +#if (_IPP32E >= _IPP32E_K0) + if (IsFeatureEnabled(ippCPUID_AVX512VAES) && IsFeatureEnabled(ippCPUID_AVX512VCLMUL)) { + switch (keyByteLen) { + case 16: + AES_GCM_ENCRYPT_UPDATE(pState) = aes_gcm_enc_128_update_vaes_avx512; + AES_GCM_DECRYPT_UPDATE(pState) = aes_gcm_dec_128_update_vaes_avx512; + AES_GCM_GET_TAG(pState) = aes_gcm_gettag_128_vaes_avx512; + break; + case 24: + AES_GCM_ENCRYPT_UPDATE(pState) = aes_gcm_enc_192_update_vaes_avx512; + AES_GCM_DECRYPT_UPDATE(pState) = aes_gcm_dec_192_update_vaes_avx512; + AES_GCM_GET_TAG(pState) = aes_gcm_gettag_192_vaes_avx512; + break; + case 32: + AES_GCM_ENCRYPT_UPDATE(pState) = aes_gcm_enc_256_update_vaes_avx512; + AES_GCM_DECRYPT_UPDATE(pState) = aes_gcm_dec_256_update_vaes_avx512; + AES_GCM_GET_TAG(pState) = aes_gcm_gettag_256_vaes_avx512; + break; + } + + AES_GCM_IV_UPDATE(pState) = aes_gcm_iv_hash_update_vaes512; + AES_GCM_IV_FINALIZE(pState) = aes_gcm_iv_hash_finalize_vaes512; + AES_GCM_AAD_UPDATE(pState) = aes_gcm_aad_hash_update_vaes512; + AES_GCM_GMUL(pState) = aes_gcm_gmult_vaes512; + } else { + switch (keyByteLen) { + case 16: + AES_GCM_ENCRYPT_UPDATE(pState) = aes_gcm_enc_128_update_avx512; + AES_GCM_DECRYPT_UPDATE(pState) = aes_gcm_dec_128_update_avx512; + AES_GCM_GET_TAG(pState) = aes_gcm_gettag_128_avx512; + break; + case 24: + AES_GCM_ENCRYPT_UPDATE(pState) = aes_gcm_enc_192_update_avx512; + AES_GCM_DECRYPT_UPDATE(pState) = aes_gcm_dec_192_update_avx512; + AES_GCM_GET_TAG(pState) = aes_gcm_gettag_192_avx512; + break; + case 32: + AES_GCM_ENCRYPT_UPDATE(pState) = aes_gcm_enc_256_update_avx512; + AES_GCM_DECRYPT_UPDATE(pState) = aes_gcm_dec_256_update_avx512; + AES_GCM_GET_TAG(pState) = aes_gcm_gettag_256_avx512; + break; + } + + AES_GCM_IV_UPDATE(pState) = aes_gcm_iv_hash_update_avx512; + AES_GCM_IV_FINALIZE(pState) = aes_gcm_iv_hash_finalize_avx512; + AES_GCM_AAD_UPDATE(pState) = aes_gcm_aad_hash_update_avx512; + AES_GCM_GMUL(pState) = aes_gcm_gmult_avx512; + } +#else + IPP_UNREFERENCED_PARAMETER(keyByteLen); + + /* set up: + // - ghash function + // - authentication function + */ + AESGCM_HASH(pState) = AesGcmMulGcm_table2K_ct; // AesGcmMulGcm_table2K; + AESGCM_AUTH(pState) = AesGcmAuth_table2K_ct; // AesGcmAuth_table2K; + AESGCM_ENC(pState) = wrpAesGcmEnc_table2K; + AESGCM_DEC(pState) = wrpAesGcmDec_table2K; + +#if (_IPP>=_IPP_P8) || (_IPP32E>=_IPP32E_Y8) +// the dead code that currently is unused +//#if (_IPP32E >= _IPP32E_K0) +// if (IsFeatureEnabled(ippCPUID_AVX512VAES)) { +// AESGCM_HASH(pState) = AesGcmMulGcm_vaes; +// AESGCM_AUTH(pState) = AesGcmAuth_vaes; +// AESGCM_ENC(pState) = AesGcmEnc_vaes; +// AESGCM_DEC(pState) = AesGcmDec_vaes; +// } else +//#endif /* #if(_IPP32E>=_IPP32E_K0) */ + if (IsFeatureEnabled(ippCPUID_AES | ippCPUID_CLMUL)) { + AESGCM_HASH(pState) = AesGcmMulGcm_avx; + AESGCM_AUTH(pState) = AesGcmAuth_avx; + AESGCM_ENC(pState) = wrpAesGcmEnc_avx; + AESGCM_DEC(pState) = wrpAesGcmDec_avx; + } +#if (_IPP==_IPP_H9) || (_IPP32E==_IPP32E_L9) + if (IsFeatureEnabled(ippCPUID_AVX2VAES | ippCPUID_AVX2VCLMUL)) { + AESGCM_HASH(pState) = AesGcmMulGcm_avx; + AESGCM_AUTH(pState) = AesGcmAuth_avx; + AESGCM_ENC(pState) = AesGcmEnc_vaes_avx2; + AESGCM_DEC(pState) = AesGcmDec_vaes_avx2; + } +#endif /* #if(_IPP==_IPP_H9) || (_IPP32E==_IPP32E_L9) */ +#endif /* #if(_IPP>=_IPP_P8) || (_IPP32E>=_IPP32E_Y8) */ + +#endif /* #if(_IPP32E>=_IPP32E_K0) */ +} + +/*! + * This function computes AES-GCM encryption kernel with the the conditional noise injections mechanism (Mistletoe3 + * attack mitigation). + * + * Parameters: + * \param[in] pSrc Pointer to plaintext. + * \param[in] pDst Pointer to ciphertext. + * \param[in] ptxt_len Length of the plaintext in bytes. + * \param[in] pState Pointer to the AES-GCM context. + */ +IPP_OWN_DEFN(void, condNoisedGCMEncryption, (const Ipp8u* pSrc, Ipp8u* pDst, int ptxt_len, + IppsAES_GCMState* pState)) +{ +/* Identify the encryption method. It's different for different platforms */ +#if(_IPP32E>=_IPP32E_K0) + EncryptUpdate_ encFunc = AES_GCM_ENCRYPT_UPDATE(pState); +#else + Encrypt_ encFunc = AESGCM_ENC(pState); +#endif + +#if (_AES_PROB_NOISE == _FEATURE_ON_) + /* Mistletoe3 mitigation */ + cpAESNoiseParams *params = (cpAESNoiseParams*)&AESGCM_NOISE_PARAMS(pState); + if (AES_NOISE_LEVEL(params) > 0) { + /* Number of bytes allowed for operation without adding noise */ + int chunk_size; + /* Number of bytes remaining for operation */ + int remaining_size = ptxt_len; + + while (remaining_size > 0) { + /* How many bytes to encrypt in this operation */ + chunk_size = (remaining_size >= MISTLETOE3_MAX_CHUNK_SIZE) ? + MISTLETOE3_MAX_CHUNK_SIZE : + remaining_size; + + #if(_IPP32E>=_IPP32E_K0) + encFunc(&AES_GCM_KEY_DATA(pState), &AES_GCM_CONTEXT_DATA(pState), + pDst, pSrc, (Ipp64u)chunk_size); + #else + encFunc(pDst, pSrc, chunk_size, pState); + #endif + + cpAESRandomNoise(NULL, + MISTLETOE3_BASE_NOISE_LEVEL + AES_NOISE_LEVEL(params), + MISTLETOE3_NOISE_RATE, + &AES_NOISE_RAND(params)); + + pSrc += chunk_size; + pDst += chunk_size; + remaining_size -= chunk_size; + } + } else +#endif + { /* Process without noise injection */ + #if(_IPP32E>=_IPP32E_K0) + encFunc(&AES_GCM_KEY_DATA(pState), &AES_GCM_CONTEXT_DATA(pState), + pDst, pSrc, (Ipp64u)ptxt_len); + #else + encFunc(pDst, pSrc, ptxt_len, pState); + #endif + } +} + + +/*! + * This function computes AES-GCM decryption kernel with the the conditional noise injections mechanism (Mistletoe3 + * attack mitigation). + * + * Parameters: + * \param[in] pSrc Pointer to ciphertext. + * \param[in] pDst Pointer to deciphered text. + * \param[in] ctxt_len Length of the ciphertext in bytes. + * \param[in] pState Pointer to the AES-GCM context. + */ +IPP_OWN_DEFN(void, condNoisedGCMDecryption, (const Ipp8u* pSrc, Ipp8u* pDst, int ctxt_len, + IppsAES_GCMState* pState)) +{ +/* Identify the decryption method. It's different for different platforms */ +#if(_IPP32E>=_IPP32E_K0) + DecryptUpdate_ decFunc = AES_GCM_DECRYPT_UPDATE(pState); +#else + Decrypt_ decFunc = AESGCM_DEC(pState); +#endif + +#if (_AES_PROB_NOISE == _FEATURE_ON_) + /* Mistletoe3 mitigation */ + cpAESNoiseParams *params = (cpAESNoiseParams*)&AESGCM_NOISE_PARAMS(pState); + if (AES_NOISE_LEVEL(params) > 0) { + /* Number of bytes allowed for operation without adding noise */ + int chunk_size; + /* Number of bytes remaining for operation */ + int remaining_size = ctxt_len; + + while (remaining_size > 0) { + /* How many bytes to decrypt in this operation */ + chunk_size = (remaining_size >= MISTLETOE3_MAX_CHUNK_SIZE) ? + MISTLETOE3_MAX_CHUNK_SIZE : + remaining_size; + + #if(_IPP32E>=_IPP32E_K0) + decFunc(&AES_GCM_KEY_DATA(pState), &AES_GCM_CONTEXT_DATA(pState), + pDst, pSrc, (Ipp64u)chunk_size); + #else + decFunc(pDst, pSrc, chunk_size, pState); + #endif + + cpAESRandomNoise(NULL, + MISTLETOE3_BASE_NOISE_LEVEL + AES_NOISE_LEVEL(params), + MISTLETOE3_NOISE_RATE, + &AES_NOISE_RAND(params)); + + pSrc += chunk_size; + pDst += chunk_size; + remaining_size -= chunk_size; + } + } else +#endif + { /* Process without noise injection */ + #if(_IPP32E>=_IPP32E_K0) + decFunc(&AES_GCM_KEY_DATA(pState), &AES_GCM_CONTEXT_DATA(pState), + pDst, pSrc, (Ipp64u)ctxt_len); + #else + decFunc(pDst, pSrc, ctxt_len, pState); + #endif + } +} diff --git a/sources/ippcp/pcpaes_gcm_internal_func.h b/sources/ippcp/pcpaes_gcm_internal_func.h new file mode 100644 index 00000000..2bd5c627 --- /dev/null +++ b/sources/ippcp/pcpaes_gcm_internal_func.h @@ -0,0 +1,41 @@ +/************************************************************************* +* Copyright (C) 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*************************************************************************/ + +/* +// +// Purpose: +// Cryptography Primitive. +// Initialization functions for internal methods and pointers inside AES-GCM context; +// AES-GCM encryption kernels with the conditional noise injections mechanism; +// +*/ + +#if !defined(_PCP_AES_GCM_INTERNAL_FUNC_H) +#define _PCP_AES_GCM_INTERNAL_FUNC_H + +#include "owndefs.h" +#include "pcpaes_internal_func.h" + +#define cpAesGCM_setup_ptrs_and_methods OWNAPI(cpAesGCM_setup_ptrs_and_methods) +IPP_OWN_DECL(void, cpAesGCM_setup_ptrs_and_methods, (IppsAES_GCMState * pCtx, Ipp64u keyByteLen)) + +#define condNoisedGCMEncryption OWNAPI(condNoisedGCMEncryption) +IPP_OWN_DECL(void, condNoisedGCMEncryption, (const Ipp8u* pSrc, Ipp8u* pDst, int ptxt_len, IppsAES_GCMState* pState)) + +#define condNoisedGCMDecryption OWNAPI(condNoisedGCMDecryption) +IPP_OWN_DECL(void, condNoisedGCMDecryption, (const Ipp8u* pSrc, Ipp8u* pDst, int ptxt_len, IppsAES_GCMState* pState)) + +#endif /* _PCP_AES_GCM_INTERNAL_FUNC_H */ diff --git a/sources/ippcp/pcpaes_gcm_vaes512.h b/sources/ippcp/pcpaes_gcm_vaes512.h index dc98428c..de551d98 100644 --- a/sources/ippcp/pcpaes_gcm_vaes512.h +++ b/sources/ippcp/pcpaes_gcm_vaes512.h @@ -59,7 +59,7 @@ static __ALIGN64 Ipp8u swapBytes[] = { * * NB: make sure unused parts of input registers are zeroed to avoid issues with further horizontal XOR. */ -__INLINE void AesGcmKaratsubaMul4(const __m512i * const pA, /* A3 A2 A1 A0 */ +__IPPCP_INLINE void AesGcmKaratsubaMul4(const __m512i * const pA, /* A3 A2 A1 A0 */ const __m512i * const pHKeys, /* B3 B2 B1 B0 */ const __m512i * const pHKeysKaratsuba, /* precomputed (b1i^b0i) */ __m512i * const pH, @@ -77,7 +77,7 @@ __INLINE void AesGcmKaratsubaMul4(const __m512i * const pA, /* A3 A /* The function performs horizontal XOR for 4 128-bit values in 512-bit register 128-bit result value saved in the low part of the 512-bit register */ -__INLINE void HXor4x128(const __m512i * const zmm, +__IPPCP_INLINE void HXor4x128(const __m512i * const zmm, __m128i * const xmm) { __m256i ymm; @@ -92,7 +92,7 @@ __INLINE void HXor4x128(const __m512i * const zmm, /* The function performs Montgomery reduction of 256-bit polynomial to 128-bit one with irreducible polynomial */ -__INLINE void ReducePoly2x128(const __m128i * const pHI, +__IPPCP_INLINE void ReducePoly2x128(const __m128i * const pHI, const __m128i * const pLO, __m128i * const result) { @@ -114,7 +114,7 @@ __INLINE void ReducePoly2x128(const __m128i * const pHI, } /* The function aggregates partial products of Karatsuba multiplication into final ghash value */ -__INLINE void AggregateKaratsubaPartialProducts(const __m512i * const pH, +__IPPCP_INLINE void AggregateKaratsubaPartialProducts(const __m512i * const pH, const __m512i * const pM, const __m512i * const pL, __m128i * const result) diff --git a/sources/ippcp/pcpaes_gcmdecrypt.c b/sources/ippcp/pcpaes_gcmdecrypt.c index 57f05922..595778b4 100644 --- a/sources/ippcp/pcpaes_gcmdecrypt.c +++ b/sources/ippcp/pcpaes_gcmdecrypt.c @@ -29,7 +29,7 @@ #include "owncp.h" #include "pcpaesm.h" #include "pcptool.h" -#include "pcpaes_internal_func.h" +#include "pcpaes_gcm_internal_func.h" #if (_ALG_AES_SAFE_==_ALG_AES_SAFE_COMPACT_SBOX_) # include "pcprijtables.h" diff --git a/sources/ippcp/pcpaes_gcmencrypt.c b/sources/ippcp/pcpaes_gcmencrypt.c index 626e3df6..c37b6799 100644 --- a/sources/ippcp/pcpaes_gcmencrypt.c +++ b/sources/ippcp/pcpaes_gcmencrypt.c @@ -29,7 +29,7 @@ #include "owncp.h" #include "pcpaesm.h" #include "pcptool.h" -#include "pcpaes_internal_func.h" +#include "pcpaes_gcm_internal_func.h" #if (_ALG_AES_SAFE_==_ALG_AES_SAFE_COMPACT_SBOX_) # include "pcprijtables.h" diff --git a/sources/ippcp/pcpaes_gcminit.c b/sources/ippcp/pcpaes_gcminit.c index 586b6236..fd41a444 100644 --- a/sources/ippcp/pcpaes_gcminit.c +++ b/sources/ippcp/pcpaes_gcminit.c @@ -30,7 +30,7 @@ #include "owncp.h" #include "pcpaesm.h" #include "pcptool.h" -#include "pcpaes_internal_func.h" +#include "pcpaes_gcm_internal_func.h" #if (_ALG_AES_SAFE_==_ALG_AES_SAFE_COMPACT_SBOX_) # include "pcprijtables.h" @@ -89,7 +89,7 @@ IPPFUN(IppStatus, ippsAES_GCMInit,(const Ipp8u* pKey, int keyLen, IppsAES_GCMSta Ipp8u zeroKey[32] = {0}; const Ipp8u* pActualKey = pKey? pKey : zeroKey; - + #if (_AES_PROB_NOISE == _FEATURE_ON_) /* Reset AES noise parameters */ cpAESNoiseParams *params = (cpAESNoiseParams *)&AESGCM_NOISE_PARAMS(pState); @@ -132,7 +132,7 @@ IPPFUN(IppStatus, ippsAES_GCMInit,(const Ipp8u* pKey, int keyLen, IppsAES_GCMSta } #else - + /* init cipher */ { IppStatus sts = ippsAESInit(pKey, keyLen, AESGCM_CIPHER(pState), cpSizeofCtx_AES()); @@ -155,24 +155,24 @@ IPPFUN(IppStatus, ippsAES_GCMInit,(const Ipp8u* pKey, int keyLen, IppsAES_GCMSta #endif } + #if (_IPP >=_IPP_H9) || (_IPP32E>=_IPP32E_L9) + if (IsFeatureEnabled(ippCPUID_AVX2VAES|ippCPUID_AVX2VCLMUL)) { + AesGcmPrecompute_avx2_vaes(AESGCM_CPWR(pState), AESGCM_HKEY(pState)); + } + else + #endif /* #if (_IPP==_IPP_H9) || (_IPP32E==_IPP32E_L9) */ + #if (_IPP>=_IPP_P8) || (_IPP32E>=_IPP32E_Y8) - // the dead code that currently is unused - //#if(_IPP32E>=_IPP32E_K0) - //if (IsFeatureEnabled(ippCPUID_AVX512VAES)) { - // /* pre-compute hKey<<1, (hKey<<1)^2, (hKey<<1)^3, ... , (hKey<<1)^15 and corresponding - // Karatsuba constant multipliers for aggregated reduction */ - // AesGcmPrecompute_vaes(AESGCM_CPWR(pState), AESGCM_HKEY(pState)); - //} - //else - //#endif /* #if(_IPP32E>=_IPP32E_K0) */ - if(IsFeatureEnabled(ippCPUID_AES|ippCPUID_CLMUL) || IsFeatureEnabled(ippCPUID_AVX2VAES|ippCPUID_AVX2VCLMUL)) { - /* pre-compute reflect(hkey) and hKey<<1, (hKey<<1)^2 and (hKey<<1)^4 powers of hKey */ - AesGcmPrecompute_avx(AESGCM_CPWR(pState), AESGCM_HKEY(pState)); - } - else - #endif - AesGcmPrecompute_table2K(AES_GCM_MTBL(pState), AESGCM_HKEY(pState)); - #endif /* #if(_IPP32E>=_IPP32E_K0) */ + if(IsFeatureEnabled(ippCPUID_AES|ippCPUID_CLMUL)) { + /* pre-compute reflect(hkey) and hKey<<1, (hKey<<1)^2 and (hKey<<1)^4 powers of hKey */ + AesGcmPrecompute_avx(AESGCM_CPWR(pState), AESGCM_HKEY(pState)); + } + else + #endif /* #if (_IPP>=_IPP_P8) || (_IPP32E>=_IPP32E_Y8) */ + AesGcmPrecompute_table2K(AES_GCM_MTBL(pState), AESGCM_HKEY(pState)); + +#endif /* #if(_IPP32E>=_IPP32E_K0) */ + return ippStsNoErr; } diff --git a/sources/ippcp/pcpaes_gcmmul_vaes512.c b/sources/ippcp/pcpaes_gcmmul_vaes512.c index 4953eccc..76b30a74 100644 --- a/sources/ippcp/pcpaes_gcmmul_vaes512.c +++ b/sources/ippcp/pcpaes_gcmmul_vaes512.c @@ -45,7 +45,7 @@ polynomial reduction. 2 polynomials can be processed at one call. The inputs are bit-reflected. The result is bit-reflected. */ -__INLINE void AesGcmGhash2(const __m256i* const src1, +__IPPCP_INLINE void AesGcmGhash2(const __m256i* const src1, const __m256i* const src2, __m256i * const result) { @@ -101,7 +101,7 @@ __INLINE void AesGcmGhash2(const __m256i* const src1, polynomial reduction. The inputs are bit-reflected. The result is bit-reflected. */ -__INLINE void AesGcmGhash(const __m128i* const a, +__IPPCP_INLINE void AesGcmGhash(const __m128i* const a, const __m128i* const b, __m128i * const result) { @@ -119,7 +119,7 @@ __INLINE void AesGcmGhash(const __m128i* const a, polynomial reduction. 4 polynomials can be processed at one call. The inputs are bit-reflected. The result is bit-reflected. */ -__INLINE void AesGcmGhash4(const __m512i* const src1, +__IPPCP_INLINE void AesGcmGhash4(const __m512i* const src1, const __m512i* const src2, __m512i * const result) { diff --git a/sources/ippcp/pcpaes_gcmreinit.c b/sources/ippcp/pcpaes_gcmreinit.c index f171a996..069b698b 100644 --- a/sources/ippcp/pcpaes_gcmreinit.c +++ b/sources/ippcp/pcpaes_gcmreinit.c @@ -28,7 +28,7 @@ #include "owndefs.h" #include "owncp.h" #include "pcpaesm.h" -#include "pcpaes_internal_func.h" +#include "pcpaes_gcm_internal_func.h" #include "pcptool.h" #if (_IPP32E >= _IPP32E_K0) diff --git a/sources/ippcp/pcpaes_internal_func.c b/sources/ippcp/pcpaes_internal_func.c index 2624bd9c..b9b47d7b 100644 --- a/sources/ippcp/pcpaes_internal_func.c +++ b/sources/ippcp/pcpaes_internal_func.c @@ -18,25 +18,15 @@ // // Purpose: // Cryptography Primitive. -// * Initialization functions for internal methods and pointers inside -// AES cipher context and AES-GCM context; -// * AES-GCM encryption kernels with the conditional noise injections mechanism; +// * Initialization functions for internal methods and pointers inside AES cipher context // */ #include "pcpaes_internal_func.h" -#include "aes_gcm_avx512.h" -#include "owndefs.h" #include "owncp.h" #include "pcpaesm.h" #include "pcptool.h" -#if (_IPP32E >= _IPP32E_K0) -#include "pcpaesauthgcm_avx512.h" -#else -#include "pcpaesauthgcm.h" -#endif /* #if(_IPP32E>=_IPP32E_K0) */ - /* * This function set up pointers to encryption and decryption key schedules, * dispatches to the right internal methods and sets pointers to them inside the AES state. @@ -75,226 +65,3 @@ IPP_OWN_DEFN(void, cpAes_setup_ptrs_and_methods, (IppsAESSpec * pCtx)) } #endif } - -/* - * This function dispatches to the right internal methods and sets pointers to them inside the AES-GCM state. - */ -IPP_OWN_DEFN(void, cpAesGCM_setup_ptrs_and_methods, (IppsAES_GCMState * pState, Ipp64u keyByteLen)) -{ -#if (_IPP32E >= _IPP32E_K0) - if (IsFeatureEnabled(ippCPUID_AVX512VAES) && IsFeatureEnabled(ippCPUID_AVX512VCLMUL)) { - switch (keyByteLen) { - case 16: - AES_GCM_ENCRYPT_UPDATE(pState) = aes_gcm_enc_128_update_vaes_avx512; - AES_GCM_DECRYPT_UPDATE(pState) = aes_gcm_dec_128_update_vaes_avx512; - AES_GCM_GET_TAG(pState) = aes_gcm_gettag_128_vaes_avx512; - break; - case 24: - AES_GCM_ENCRYPT_UPDATE(pState) = aes_gcm_enc_192_update_vaes_avx512; - AES_GCM_DECRYPT_UPDATE(pState) = aes_gcm_dec_192_update_vaes_avx512; - AES_GCM_GET_TAG(pState) = aes_gcm_gettag_192_vaes_avx512; - break; - case 32: - AES_GCM_ENCRYPT_UPDATE(pState) = aes_gcm_enc_256_update_vaes_avx512; - AES_GCM_DECRYPT_UPDATE(pState) = aes_gcm_dec_256_update_vaes_avx512; - AES_GCM_GET_TAG(pState) = aes_gcm_gettag_256_vaes_avx512; - break; - } - - AES_GCM_IV_UPDATE(pState) = aes_gcm_iv_hash_update_vaes512; - AES_GCM_IV_FINALIZE(pState) = aes_gcm_iv_hash_finalize_vaes512; - AES_GCM_AAD_UPDATE(pState) = aes_gcm_aad_hash_update_vaes512; - AES_GCM_GMUL(pState) = aes_gcm_gmult_vaes512; - } else { - switch (keyByteLen) { - case 16: - AES_GCM_ENCRYPT_UPDATE(pState) = aes_gcm_enc_128_update_avx512; - AES_GCM_DECRYPT_UPDATE(pState) = aes_gcm_dec_128_update_avx512; - AES_GCM_GET_TAG(pState) = aes_gcm_gettag_128_avx512; - break; - case 24: - AES_GCM_ENCRYPT_UPDATE(pState) = aes_gcm_enc_192_update_avx512; - AES_GCM_DECRYPT_UPDATE(pState) = aes_gcm_dec_192_update_avx512; - AES_GCM_GET_TAG(pState) = aes_gcm_gettag_192_avx512; - break; - case 32: - AES_GCM_ENCRYPT_UPDATE(pState) = aes_gcm_enc_256_update_avx512; - AES_GCM_DECRYPT_UPDATE(pState) = aes_gcm_dec_256_update_avx512; - AES_GCM_GET_TAG(pState) = aes_gcm_gettag_256_avx512; - break; - } - - AES_GCM_IV_UPDATE(pState) = aes_gcm_iv_hash_update_avx512; - AES_GCM_IV_FINALIZE(pState) = aes_gcm_iv_hash_finalize_avx512; - AES_GCM_AAD_UPDATE(pState) = aes_gcm_aad_hash_update_avx512; - AES_GCM_GMUL(pState) = aes_gcm_gmult_avx512; - } -#else - IPP_UNREFERENCED_PARAMETER(keyByteLen); - - /* set up: - // - ghash function - // - authentication function - */ - AESGCM_HASH(pState) = AesGcmMulGcm_table2K_ct; // AesGcmMulGcm_table2K; - AESGCM_AUTH(pState) = AesGcmAuth_table2K_ct; // AesGcmAuth_table2K; - AESGCM_ENC(pState) = wrpAesGcmEnc_table2K; - AESGCM_DEC(pState) = wrpAesGcmDec_table2K; - -#if (_IPP>=_IPP_P8) || (_IPP32E>=_IPP32E_Y8) -// the dead code that currently is unused -//#if (_IPP32E >= _IPP32E_K0) -// if (IsFeatureEnabled(ippCPUID_AVX512VAES)) { -// AESGCM_HASH(pState) = AesGcmMulGcm_vaes; -// AESGCM_AUTH(pState) = AesGcmAuth_vaes; -// AESGCM_ENC(pState) = AesGcmEnc_vaes; -// AESGCM_DEC(pState) = AesGcmDec_vaes; -// } else -//#endif /* #if(_IPP32E>=_IPP32E_K0) */ - if (IsFeatureEnabled(ippCPUID_AES | ippCPUID_CLMUL)) { - AESGCM_HASH(pState) = AesGcmMulGcm_avx; - AESGCM_AUTH(pState) = AesGcmAuth_avx; - AESGCM_ENC(pState) = wrpAesGcmEnc_avx; - AESGCM_DEC(pState) = wrpAesGcmDec_avx; - } -#if (_IPP==_IPP_H9) || (_IPP32E==_IPP32E_L9) - if (IsFeatureEnabled(ippCPUID_AVX2VAES | ippCPUID_AVX2VCLMUL)) { - AESGCM_HASH(pState) = AesGcmMulGcm_avx; - AESGCM_AUTH(pState) = AesGcmAuth_avx; - AESGCM_ENC(pState) = AesGcmEnc_vaes_avx2; - AESGCM_DEC(pState) = AesGcmDec_vaes_avx2; - } -#endif /* #if(_IPP==_IPP_H9) || (_IPP32E==_IPP32E_L9) */ -#endif /* #if(_IPP>=_IPP_P8) || (_IPP32E>=_IPP32E_Y8) */ - -#endif /* #if(_IPP32E>=_IPP32E_K0) */ -} - - - -/*! - * This function computes AES-GCM encryption kernel with the the conditional noise injections mechanism (Mistletoe3 - * attack mitigation). - * - * Parameters: - * \param[in] pSrc Pointer to plaintext. - * \param[in] pDst Pointer to ciphertext. - * \param[in] ptxt_len Length of the plaintext in bytes. - * \param[in] pState Pointer to the AES-GCM context. - */ -IPP_OWN_DEFN(void, condNoisedGCMEncryption, (const Ipp8u* pSrc, Ipp8u* pDst, int ptxt_len, - IppsAES_GCMState* pState)) -{ -/* Identify the encryption method. It's different for different platforms */ -#if(_IPP32E>=_IPP32E_K0) - EncryptUpdate_ encFunc = AES_GCM_ENCRYPT_UPDATE(pState); -#else - Encrypt_ encFunc = AESGCM_ENC(pState); -#endif - -#if (_AES_PROB_NOISE == _FEATURE_ON_) - /* Mistletoe3 mitigation */ - cpAESNoiseParams *params = (cpAESNoiseParams*)&AESGCM_NOISE_PARAMS(pState); - if (AES_NOISE_LEVEL(params) > 0) { - /* Number of bytes allowed for operation without adding noise */ - int chunk_size; - /* Number of bytes remaining for operation */ - int remaining_size = ptxt_len; - - while (remaining_size > 0) { - /* How many bytes to encrypt in this operation */ - chunk_size = (remaining_size >= MISTLETOE3_MAX_CHUNK_SIZE) ? - MISTLETOE3_MAX_CHUNK_SIZE : - remaining_size; - - #if(_IPP32E>=_IPP32E_K0) - encFunc(&AES_GCM_KEY_DATA(pState), &AES_GCM_CONTEXT_DATA(pState), - pDst, pSrc, (Ipp64u)chunk_size); - #else - encFunc(pDst, pSrc, chunk_size, pState); - #endif - - cpAESRandomNoise(NULL, - MISTLETOE3_BASE_NOISE_LEVEL + AES_NOISE_LEVEL(params), - MISTLETOE3_NOISE_RATE, - &AES_NOISE_RAND(params)); - - pSrc += chunk_size; - pDst += chunk_size; - remaining_size -= chunk_size; - } - } else -#endif - { /* Process without noise injection */ - #if(_IPP32E>=_IPP32E_K0) - encFunc(&AES_GCM_KEY_DATA(pState), &AES_GCM_CONTEXT_DATA(pState), - pDst, pSrc, (Ipp64u)ptxt_len); - #else - encFunc(pDst, pSrc, ptxt_len, pState); - #endif - } -} - - -/*! - * This function computes AES-GCM decryption kernel with the the conditional noise injections mechanism (Mistletoe3 - * attack mitigation). - * - * Parameters: - * \param[in] pSrc Pointer to ciphertext. - * \param[in] pDst Pointer to deciphered text. - * \param[in] ctxt_len Length of the ciphertext in bytes. - * \param[in] pState Pointer to the AES-GCM context. - */ -IPP_OWN_DEFN(void, condNoisedGCMDecryption, (const Ipp8u* pSrc, Ipp8u* pDst, int ctxt_len, - IppsAES_GCMState* pState)) -{ -/* Identify the decryption method. It's different for different platforms */ -#if(_IPP32E>=_IPP32E_K0) - DecryptUpdate_ decFunc = AES_GCM_DECRYPT_UPDATE(pState); -#else - Decrypt_ decFunc = AESGCM_DEC(pState); -#endif - -#if (_AES_PROB_NOISE == _FEATURE_ON_) - /* Mistletoe3 mitigation */ - cpAESNoiseParams *params = (cpAESNoiseParams*)&AESGCM_NOISE_PARAMS(pState); - if (AES_NOISE_LEVEL(params) > 0) { - /* Number of bytes allowed for operation without adding noise */ - int chunk_size; - /* Number of bytes remaining for operation */ - int remaining_size = ctxt_len; - - while (remaining_size > 0) { - /* How many bytes to decrypt in this operation */ - chunk_size = (remaining_size >= MISTLETOE3_MAX_CHUNK_SIZE) ? - MISTLETOE3_MAX_CHUNK_SIZE : - remaining_size; - - #if(_IPP32E>=_IPP32E_K0) - decFunc(&AES_GCM_KEY_DATA(pState), &AES_GCM_CONTEXT_DATA(pState), - pDst, pSrc, (Ipp64u)chunk_size); - #else - decFunc(pDst, pSrc, chunk_size, pState); - #endif - - cpAESRandomNoise(NULL, - MISTLETOE3_BASE_NOISE_LEVEL + AES_NOISE_LEVEL(params), - MISTLETOE3_NOISE_RATE, - &AES_NOISE_RAND(params)); - - pSrc += chunk_size; - pDst += chunk_size; - remaining_size -= chunk_size; - } - } else -#endif - { /* Process without noise injection */ - #if(_IPP32E>=_IPP32E_K0) - decFunc(&AES_GCM_KEY_DATA(pState), &AES_GCM_CONTEXT_DATA(pState), - pDst, pSrc, (Ipp64u)ctxt_len); - #else - decFunc(pDst, pSrc, ctxt_len, pState); - #endif - } -} diff --git a/sources/ippcp/pcpaes_internal_func.h b/sources/ippcp/pcpaes_internal_func.h index d0082401..fe10de80 100644 --- a/sources/ippcp/pcpaes_internal_func.h +++ b/sources/ippcp/pcpaes_internal_func.h @@ -19,8 +19,6 @@ // Purpose: // Cryptography Primitive. // Initialization functions for internal methods and pointers inside AES cipher context -// and AES-GCM context; -// AES-GCM encryption kernels with the conditional noise injections mechanism; // */ @@ -32,13 +30,4 @@ #define cpAes_setup_ptrs_and_methods OWNAPI(cpAes_setup_ptrs_and_methods) IPP_OWN_DECL(void, cpAes_setup_ptrs_and_methods, (IppsAESSpec * pCtx)) -#define cpAesGCM_setup_ptrs_and_methods OWNAPI(cpAesGCM_setup_ptrs_and_methods) -IPP_OWN_DECL(void, cpAesGCM_setup_ptrs_and_methods, (IppsAES_GCMState * pCtx, Ipp64u keyByteLen)) - -#define condNoisedGCMEncryption OWNAPI(condNoisedGCMEncryption) -IPP_OWN_DECL(void, condNoisedGCMEncryption, (const Ipp8u* pSrc, Ipp8u* pDst, int ptxt_len, IppsAES_GCMState* pState)) - -#define condNoisedGCMDecryption OWNAPI(condNoisedGCMDecryption) -IPP_OWN_DECL(void, condNoisedGCMDecryption, (const Ipp8u* pSrc, Ipp8u* pDst, int ptxt_len, IppsAES_GCMState* pState)) - #endif /* _PCP_AES_INTERNAL_FUNC_H */ diff --git a/sources/ippcp/pcpaes_sivstuff.h b/sources/ippcp/pcpaes_sivstuff.h index 76108c4a..83b56927 100644 --- a/sources/ippcp/pcpaes_sivstuff.h +++ b/sources/ippcp/pcpaes_sivstuff.h @@ -14,12 +14,12 @@ * limitations under the License. *************************************************************************/ -/* -// +/* +// // Purpose: // Cryptography Primitive. // AES-SIV Functions (RFC 5297) -// +// // Contents: // Stuff() // @@ -35,12 +35,12 @@ #define _PCP_AES_SIV_STUFF_H_ //////////////////////////////////////////////////////////// -__INLINE void cpAES_CMAC(Ipp8u mac[MBS_RIJ128], const Ipp8u* pSrc, int len, IppsAES_CMACState* pCtx) +__IPPCP_INLINE void cpAES_CMAC(Ipp8u mac[MBS_RIJ128], const Ipp8u* pSrc, int len, IppsAES_CMACState* pCtx) { ippsAES_CMACUpdate(pSrc, len, pCtx); ippsAES_CMACFinal(mac, MBS_RIJ128, pCtx); } -__INLINE IppStatus cpAES_S2V_init(Ipp8u v[MBS_RIJ128], const Ipp8u* pKey, int keyLen, IppsAES_CMACState* pCtx, int ctxSize) +__IPPCP_INLINE IppStatus cpAES_S2V_init(Ipp8u v[MBS_RIJ128], const Ipp8u* pKey, int keyLen, IppsAES_CMACState* pCtx, int ctxSize) { IppStatus sts = ippsAES_CMACInit(pKey, keyLen, pCtx, ctxSize); if(ippStsNoErr==sts) { @@ -49,7 +49,7 @@ __INLINE IppStatus cpAES_S2V_init(Ipp8u v[MBS_RIJ128], const Ipp8u* pKey, int ke } return sts; } -__INLINE Ipp8u* double16(Ipp8u out[MBS_RIJ128], const Ipp8u inp[MBS_RIJ128]) +__IPPCP_INLINE Ipp8u* double16(Ipp8u out[MBS_RIJ128], const Ipp8u inp[MBS_RIJ128]) { /* double inp */ Ipp32u carry = 0; @@ -63,7 +63,7 @@ __INLINE Ipp8u* double16(Ipp8u out[MBS_RIJ128], const Ipp8u inp[MBS_RIJ128]) out[MBS_RIJ128-1] ^= ((Ipp8u)(0-carry) & 0x87); return out; } -__INLINE void cpAES_S2V_update(Ipp8u v[MBS_RIJ128], const Ipp8u* pSrc, int len, IppsAES_CMACState* pCtx) +__IPPCP_INLINE void cpAES_S2V_update(Ipp8u v[MBS_RIJ128], const Ipp8u* pSrc, int len, IppsAES_CMACState* pCtx) { Ipp8u t[MBS_RIJ128]; cpAES_CMAC(t, pSrc, len, pCtx); diff --git a/sources/ippcp/pcpaes_xts_vaes512.c b/sources/ippcp/pcpaes_xts_vaes512.c index 201f1527..5f9009d9 100644 --- a/sources/ippcp/pcpaes_xts_vaes512.c +++ b/sources/ippcp/pcpaes_xts_vaes512.c @@ -42,7 +42,7 @@ #define M512(mem) (*((__m512i*)(mem))) /* Generate next 4 tweaks with 2^8 multiplier */ -__INLINE __m512i nextTweaks_x8(__m512i tweak128x4) +__IPPCP_INLINE __m512i nextTweaks_x8(__m512i tweak128x4) { const __m512i poly = _mm512_set_epi64(0, 0x87, 0, 0x87, 0, 0x87, 0, 0x87); @@ -55,7 +55,7 @@ __INLINE __m512i nextTweaks_x8(__m512i tweak128x4) } /* Generate next 4 tweaks with 2^32 multiplier */ -__INLINE __m512i nextTweaks_x32(__m512i tweak128x4) +__IPPCP_INLINE __m512i nextTweaks_x32(__m512i tweak128x4) { const __m512i poly = _mm512_set_epi64(0, 0x87, 0, 0x87, 0, 0x87, 0, 0x87); diff --git a/sources/ippcp/pcpaesauthgcm.h b/sources/ippcp/pcpaesauthgcm.h index c6c5eadc..a7aeca56 100644 --- a/sources/ippcp/pcpaesauthgcm.h +++ b/sources/ippcp/pcpaesauthgcm.h @@ -14,14 +14,14 @@ * limitations under the License. *************************************************************************/ -/* -// +/* +// // Purpose: // Cryptography Primitive. // Message Authentication Algorithm // Internal Definitions and Internal Functions Prototypes -// -// +// +// */ #if !defined(_CP_AESAUTH_GCM_H) @@ -54,7 +54,7 @@ typedef enum { } GcmState; struct _cpAES_GCM { - + Ipp32u idCtx; /* AES-GCM id */ GcmState state; /* GCM state: Init, IV|AAD|TXT processing */ Ipp64u ivLen; /* IV length (bytes) */ @@ -77,13 +77,19 @@ struct _cpAES_GCM { IppsAESSpec cipher; #if (_AES_PROB_NOISE == _FEATURE_ON_) - __ALIGN16 + __ALIGN16 cpAESNoiseParams noiseParams; #endif __ALIGN16 /* aligned pre-computed data: */ Ipp8u multiplier[BLOCK_SIZE]; /* - (default) hKey */ /* - (aes_ni) hKey*t, (hKey*t)^2, (hKey*t)^4 */ + /* - (avx2_vaes) 16 vectors by 128-bit values + hKey<<1, hKey^2<<1, hKey^3<<1, hKey^4<<1, + hKey^5<<1, hKey^6<<1, hKey^7<<1, hKey^8<<1, + hKey^9<<1, hKey^10<<1, hKey^11<<1, hKey^12<<1, + hKey^13<<1, hKey^14<<1, hKey^15<<1, hKey^16<<1, + */ /* - (vaes_ni) 8 reverted ordered vectors by 4 128-bit values. hKeys derivations in the multiplier[] array in order of appearance (zero-index starts from the left): @@ -101,9 +107,10 @@ struct _cpAES_GCM { /* alignment */ #define AESGCM_ALIGNMENT (16) -#define PRECOMP_DATA_SIZE_AES_NI_AESGCM (BLOCK_SIZE*4) -#define PRECOMP_DATA_SIZE_VAES_NI_AESGCM (BLOCK_SIZE*16*2) -#define PRECOMP_DATA_SIZE_FAST2K (BLOCK_SIZE*128) +#define PRECOMP_DATA_SIZE_AES_NI_AESGCM (BLOCK_SIZE*4) +#define PRECOMP_DATA_SIZE_AVX2_VAES_AESGCM (BLOCK_SIZE*16) +#define PRECOMP_DATA_SIZE_VAES_NI_AESGCM (BLOCK_SIZE*16*2) +#define PRECOMP_DATA_SIZE_FAST2K (BLOCK_SIZE*128) /* // Useful macros @@ -139,13 +146,13 @@ struct _cpAES_GCM { #define AESGCM_VALID_ID(context) ((((context)->idCtx) ^ (Ipp32u)IPP_UINT_PTR((context))) == (Ipp32u)idCtxAESGCM) #if 0 -__INLINE void IncrementCounter32(Ipp8u* pCtr) +__IPPCP_INLINE void IncrementCounter32(Ipp8u* pCtr) { int i; for(i=BLOCK_SIZE-1; i>=CTR_POS && 0==(Ipp8u)(++pCtr[i]); i--) ; } #endif -__INLINE void IncrementCounter32(Ipp8u* pCtr) +__IPPCP_INLINE void IncrementCounter32(Ipp8u* pCtr) { Ipp32u* pCtr32 = (Ipp32u*)pCtr; Ipp32u ctrVal = pCtr32[3]; @@ -156,6 +163,8 @@ __INLINE void IncrementCounter32(Ipp8u* pCtr) } #if (_IPP>=_IPP_P8) || (_IPP32E>=_IPP32E_Y8) +#define AesGcmPrecompute_avx2_vaes OWNAPI(AesGcmPrecompute_avx2_vaes) + IPP_OWN_DECL (void, AesGcmPrecompute_avx2_vaes, (Ipp8u* pPrecomputeData, const Ipp8u* pHKey)) #define AesGcmPrecompute_avx OWNAPI(AesGcmPrecompute_avx) IPP_OWN_DECL (void, AesGcmPrecompute_avx, (Ipp8u* pPrecomputeData, const Ipp8u* pHKey)) #define AesGcmMulGcm_avx OWNAPI(AesGcmMulGcm_avx) @@ -217,7 +226,9 @@ static int cpSizeofCtx_AESGCM(void) int precomp_size; #if (_IPP>=_IPP_P8) || (_IPP32E>=_IPP32E_Y8) - if(IsFeatureEnabled(ippCPUID_AES|ippCPUID_CLMUL) || IsFeatureEnabled(ippCPUID_AVX2VAES|ippCPUID_AVX2VCLMUL)) + if (IsFeatureEnabled(ippCPUID_AVX2VAES|ippCPUID_AVX2VCLMUL)) + precomp_size = PRECOMP_DATA_SIZE_AVX2_VAES_AESGCM; + else if (IsFeatureEnabled(ippCPUID_AES|ippCPUID_CLMUL)) precomp_size = PRECOMP_DATA_SIZE_AES_NI_AESGCM; else #endif diff --git a/sources/ippcp/pcpaesauthgcm_avx512.h b/sources/ippcp/pcpaesauthgcm_avx512.h index 9a8cd569..d774ce97 100644 --- a/sources/ippcp/pcpaesauthgcm_avx512.h +++ b/sources/ippcp/pcpaesauthgcm_avx512.h @@ -18,7 +18,7 @@ // // Purpose: // Cryptography Primitive. -// AES GCM otimized for AVX512 and AVX512-VAES features +// AES GCM optimized for AVX512 and AVX512-VAES features // Internal Definitions // // @@ -98,10 +98,10 @@ struct _cpAES_GCM { EncryptUpdate_ encryptUpdateFunc; // Encryption-authentication DecryptUpdate_ decryptUpdateFunc; // Decryption-verification GetTag_ getTagFunc; // Get tag - + #if (_AES_PROB_NOISE == _FEATURE_ON_) __ALIGN16 - cpAESNoiseParams noiseParams; + cpAESNoiseParams noiseParams; #endif }; diff --git a/sources/ippcp/pcpaesgcmtbl2k_mulpx.c b/sources/ippcp/pcpaesgcmtbl2k_mulpx.c index fad70fd4..4e79658c 100644 --- a/sources/ippcp/pcpaesgcmtbl2k_mulpx.c +++ b/sources/ippcp/pcpaesgcmtbl2k_mulpx.c @@ -14,17 +14,17 @@ * limitations under the License. *************************************************************************/ -/* -// +/* +// // Purpose: // Cryptography Primitive. // Encrypt/Decrypt byte data stream according to Rijndael128 (GCM mode) -// +// // "fast" stuff -// +// // Contents: // AesGcmMulGcm_table2K() -// +// */ @@ -55,7 +55,7 @@ typedef struct{ // // Ghash = Ghash * HKey mod G() */ -__INLINE Ipp16u getAesGcmConst_table_ct(int idx) +__IPPCP_INLINE Ipp16u getAesGcmConst_table_ct(int idx) { #define TBL_SLOTS_REP_READ (Ipp32s)(sizeof(BNU_CHUNK_T)/sizeof(AesGcmConst_table[0])) const BNU_CHUNK_T* TblEntry = (BNU_CHUNK_T*)AesGcmConst_table; @@ -137,13 +137,13 @@ void AesGcmMulGcm_table2K(Ipp8u* pGhash, const Ipp8u* pPrecomputeData, const voi // CTE version of AesGcmMulGcm_table2K() */ #if (_IPP_ARCH ==_IPP_ARCH_EM64T) -__INLINE void MaskedXorBlock16(const Ipp8u* pSrc1, const Ipp8u* pSrc2, Ipp8u* pDst, Ipp64u src2mask) +__IPPCP_INLINE void MaskedXorBlock16(const Ipp8u* pSrc1, const Ipp8u* pSrc2, Ipp8u* pDst, Ipp64u src2mask) { ((Ipp64u*)pDst)[0] = ((Ipp64u*)pSrc1)[0] ^ (((Ipp64u*)pSrc2)[0] & src2mask); ((Ipp64u*)pDst)[1] = ((Ipp64u*)pSrc1)[1] ^ (((Ipp64u*)pSrc2)[1] & src2mask); } #else /* IPP_ARCH == IPP_ARCH_IA32 */ -__INLINE void MaskedXorBlock16(const Ipp8u* pSrc1, const Ipp8u* pSrc2, Ipp8u* pDst, Ipp32u src2mask) +__IPPCP_INLINE void MaskedXorBlock16(const Ipp8u* pSrc1, const Ipp8u* pSrc2, Ipp8u* pDst, Ipp32u src2mask) { ((Ipp32u*)pDst)[0] = ((Ipp32u*)pSrc1)[0] ^ (((Ipp32u*)pSrc2)[0] & src2mask); ((Ipp32u*)pDst)[1] = ((Ipp32u*)pSrc1)[1] ^ (((Ipp32u*)pSrc2)[1] & src2mask); @@ -238,7 +238,7 @@ IPP_OWN_DEFN (void, AesGcmMulGcm_table2K_ct, (Ipp8u* pGhash, const Ipp8u* pPreco #if ((_IPP>=_IPP_V8) || (_IPP32E>=_IPP32E_N8)) -__INLINE Ipp16u getAesGcmConst_table_ct(int idx) +__IPPCP_INLINE Ipp16u getAesGcmConst_table_ct(int idx) { /* init current indexes */ __ALIGN16 Ipp16u idx_start[] = { 0,1,2,3,4,5,6,7 }; diff --git a/sources/ippcp/pcpaesm.h b/sources/ippcp/pcpaesm.h index ebf04ef6..9b220b3c 100644 --- a/sources/ippcp/pcpaesm.h +++ b/sources/ippcp/pcpaesm.h @@ -14,14 +14,14 @@ * limitations under the License. *************************************************************************/ -/* -// +/* +// // Purpose: // Cryptography Primitive. // Internal Definitions and // Internal AES Function Prototypes -// -// +// +// */ #if !defined(_PCP_AES_H) @@ -65,13 +65,13 @@ static int rij128nKeys[3] = {44, 52, 60 }; // helper for nRounds[] and estnKeys[] access // note: x is length in 32-bits words */ -__INLINE int rij_index(int x) +__IPPCP_INLINE int rij_index(int x) { return (x-NB(128))>>1; } /* size of AES context */ -__INLINE int cpSizeofCtx_AES(void) +__IPPCP_INLINE int cpSizeofCtx_AES(void) { return sizeof(IppsAESSpec); } diff --git a/sources/ippcp/pcpaesmxts.h b/sources/ippcp/pcpaesmxts.h index ea9d7682..a8eb0737 100644 --- a/sources/ippcp/pcpaesmxts.h +++ b/sources/ippcp/pcpaesmxts.h @@ -14,13 +14,13 @@ * limitations under the License. *************************************************************************/ -/* -// +/* +// // Purpose: // Cryptography Primitive. // AES-XTS Internal Definitions -// -// +// +// */ #if !defined(_PCP_AES_XTS_H) @@ -53,7 +53,7 @@ struct _cpAES_XTS #define VALID_AES_XTS_ID(ctx) ((((ctx)->idCtx) ^ (Ipp32u)IPP_UINT_PTR((ctx))) == (Ipp32u)idCtxAESXTS) /* size of AES-XTS context */ -__INLINE int cpSizeof_AES_XTS_Ctx(void) +__IPPCP_INLINE int cpSizeof_AES_XTS_Ctx(void) { return sizeof(IppsAES_XTSSpec); } diff --git a/sources/ippcp/pcpaesmxtsstuff.h b/sources/ippcp/pcpaesmxtsstuff.h index 41ab30de..185d54c4 100644 --- a/sources/ippcp/pcpaesmxtsstuff.h +++ b/sources/ippcp/pcpaesmxtsstuff.h @@ -14,13 +14,13 @@ * limitations under the License. *************************************************************************/ -/* -// +/* +// // Purpose: // Cryptography Primitive. // AES-XTS Internal Functions -// -// +// +// */ #if !defined(_PCP_AES_XTS_STUFF_H) @@ -39,7 +39,7 @@ #define GF_MASK (0x8000000000000000) #define GF_POLY (0x0000000000000087) -__INLINE void gf_mul_by_primitive(void* x) +__IPPCP_INLINE void gf_mul_by_primitive(void* x) { Ipp64u* x64 = (Ipp64u*)x; Ipp64u xorL = ((Ipp64s)x64[1] >> 63) & GF_POLY; diff --git a/sources/ippcp/pcpaesnoise.h b/sources/ippcp/pcpaesnoise.h index 6fc0c9bc..986c5dc0 100644 --- a/sources/ippcp/pcpaesnoise.h +++ b/sources/ippcp/pcpaesnoise.h @@ -17,9 +17,9 @@ #if !defined(_PCP_AES_NOISE_H) #define _PCP_AES_NOISE_H -/* +/* * The parameters below are empirical and chosen in advance to guarantee - * the high level of security protection against Mistletoe3 attack. + * the high level of security protection against Mistletoe3 attack. */ #define MISTLETOE3_MAX_CHUNK_SIZE (16000) /* maximum chunks size allowed to be processed without noise injection (in bytes) \ 16000 bytes = 16*1000 bytes = 1000 AES blocks */ @@ -47,7 +47,7 @@ typedef struct _cpAESNoiseParams { #define AES_NOISE_LEVEL(ctx) ((ctx)->noiseLevel) /* size of _cpAESNoiseParams structure */ -__INLINE int cpSizeofNoise_Params(void) +__IPPCP_INLINE int cpSizeofNoise_Params(void) { return sizeof(cpAESNoiseParams); } diff --git a/sources/ippcp/pcpbn.h b/sources/ippcp/pcpbn.h index fdd53d5d..c2e5d6f4 100644 --- a/sources/ippcp/pcpbn.h +++ b/sources/ippcp/pcpbn.h @@ -14,12 +14,12 @@ * limitations under the License. *************************************************************************/ -/* +/* // Intel(R) Integrated Performance Primitives // Cryptographic Primitives (ippcp) -// -// -// +// +// +// */ #if !defined(_CP_BN_H) @@ -70,7 +70,7 @@ struct _cpBigNum IPP_OWN_DECL (void, cpUnpackBigNumCtx, (const Ipp8u* pBuffer, IppsBigNumState* pBN)) /* copy BN */ -__INLINE IppsBigNumState* cpBN_copy(IppsBigNumState* pDst, const IppsBigNumState* pSrc) +__IPPCP_INLINE IppsBigNumState* cpBN_copy(IppsBigNumState* pDst, const IppsBigNumState* pSrc) { BN_SIGN(pDst) = BN_SIGN(pSrc); BN_SIZE(pDst) = BN_SIZE(pSrc); @@ -78,7 +78,7 @@ __INLINE IppsBigNumState* cpBN_copy(IppsBigNumState* pDst, const IppsBigNumState return pDst; } /* set BN to zero */ -__INLINE IppsBigNumState* cpBN_zero(IppsBigNumState* pBN) +__IPPCP_INLINE IppsBigNumState* cpBN_zero(IppsBigNumState* pBN) { BN_SIGN(pBN) = ippBigNumPOS; BN_SIZE(pBN) = 1; @@ -86,7 +86,7 @@ __INLINE IppsBigNumState* cpBN_zero(IppsBigNumState* pBN) return pBN; } /* fixup BN */ -__INLINE IppsBigNumState* cpBN_fix(IppsBigNumState* pBN) +__IPPCP_INLINE IppsBigNumState* cpBN_fix(IppsBigNumState* pBN) { cpSize len = BN_SIZE(pBN); FIX_BNU(BN_NUMBER(pBN), len); @@ -94,7 +94,7 @@ __INLINE IppsBigNumState* cpBN_fix(IppsBigNumState* pBN) return pBN; } /* set BN to chunk */ -__INLINE IppsBigNumState* cpBN_chunk(IppsBigNumState* pBN, BNU_CHUNK_T a) +__IPPCP_INLINE IppsBigNumState* cpBN_chunk(IppsBigNumState* pBN, BNU_CHUNK_T a) { BN_SIGN(pBN) = ippBigNumPOS; BN_SIZE(pBN) = 1; @@ -103,7 +103,7 @@ __INLINE IppsBigNumState* cpBN_chunk(IppsBigNumState* pBN, BNU_CHUNK_T a) return pBN; } /* set BN to 2^m */ -__INLINE IppsBigNumState* cpBN_power2(IppsBigNumState* pBN, int power) +__IPPCP_INLINE IppsBigNumState* cpBN_power2(IppsBigNumState* pBN, int power) { cpSize size = BITS_BNU_CHUNK(power+1); if(BN_ROOM(pBN) >= size) { @@ -117,14 +117,14 @@ __INLINE IppsBigNumState* cpBN_power2(IppsBigNumState* pBN, int power) } /* bitsize of BN */ -__INLINE int cpBN_bitsize(const IppsBigNumState* pA) +__IPPCP_INLINE int cpBN_bitsize(const IppsBigNumState* pA) { int bitsize = BITSIZE_BNU(BN_NUMBER(pA), BN_SIZE(pA)); return bitsize; } /* returns -1/0/+1 depemding on A~B comparison */ -__INLINE int cpBN_cmp(const IppsBigNumState* pA, const IppsBigNumState* pB) +__IPPCP_INLINE int cpBN_cmp(const IppsBigNumState* pA, const IppsBigNumState* pB) { IppsBigNumSGN signA = BN_SIGN(pA); IppsBigNumSGN signB = BN_SIGN(pB); @@ -137,7 +137,7 @@ __INLINE int cpBN_cmp(const IppsBigNumState* pA, const IppsBigNumState* pB) } /* returns -1/0/+1 depemding on A comparison 00 */ -__INLINE int cpBN_tst(const IppsBigNumState* pA) +__IPPCP_INLINE int cpBN_tst(const IppsBigNumState* pA) { if(1==BN_SIZE(pA) && 0==BN_NUMBER(pA)[0]) return 0; @@ -146,17 +146,17 @@ __INLINE int cpBN_tst(const IppsBigNumState* pA) } -// some addtition functions -__INLINE int IsZero_BN(const IppsBigNumState* pA) +// some addition functions +__IPPCP_INLINE int IsZero_BN(const IppsBigNumState* pA) { return ( BN_SIZE(pA)==1 ) && ( BN_NUMBER(pA)[0]==0 ); } -__INLINE int IsOdd_BN(const IppsBigNumState* pA) +__IPPCP_INLINE int IsOdd_BN(const IppsBigNumState* pA) { return BN_NUMBER(pA)[0] & 1; } -__INLINE IppsBigNumState* BN_Word(IppsBigNumState* pBN, BNU_CHUNK_T w) +__IPPCP_INLINE IppsBigNumState* BN_Word(IppsBigNumState* pBN, BNU_CHUNK_T w) { BN_SIGN(pBN) = ippBigNumPOS; BN_SIZE(pBN) = 1; @@ -164,14 +164,14 @@ __INLINE IppsBigNumState* BN_Word(IppsBigNumState* pBN, BNU_CHUNK_T w) BN_NUMBER(pBN)[0] = w; return pBN; } -__INLINE IppsBigNumState* BN_Set(const BNU_CHUNK_T* pData, cpSize len, IppsBigNumState* pBN) +__IPPCP_INLINE IppsBigNumState* BN_Set(const BNU_CHUNK_T* pData, cpSize len, IppsBigNumState* pBN) { BN_SIGN(pBN) = ippBigNumPOS; BN_SIZE(pBN) = len; ZEXPAND_COPY_BNU(BN_NUMBER(pBN), BN_ROOM(pBN), pData, len); return pBN; } -__INLINE IppsBigNumState* BN_Make(BNU_CHUNK_T* pData, BNU_CHUNK_T* pBuffer, cpSize len, IppsBigNumState* pBN) +__IPPCP_INLINE IppsBigNumState* BN_Make(BNU_CHUNK_T* pData, BNU_CHUNK_T* pBuffer, cpSize len, IppsBigNumState* pBN) { BN_SET_ID(pBN); BN_SIGN(pBN) = ippBigNumPOS; diff --git a/sources/ippcp/pcpbninit.c b/sources/ippcp/pcpbninit.c index 2b249a66..b4c86766 100644 --- a/sources/ippcp/pcpbninit.c +++ b/sources/ippcp/pcpbninit.c @@ -57,7 +57,7 @@ IPPFUN(IppStatus, ippsBigNumInit, (int length, IppsBigNumState* pBN)) cpSize len = INTERNAL_BNU_LENGTH(length); BN_SIGN(pBN) = ippBigNumPOS; - BN_SIZE(pBN) = 1; /* initial valie is zero */ + BN_SIZE(pBN) = 1; /* initial value is zero */ BN_ROOM(pBN) = len; /* close to what has been passed by user */ /* reserve one BNU_CHUNK_T more for cpDiv_BNU, diff --git a/sources/ippcp/pcpbnu32_arith_sub.c b/sources/ippcp/pcpbnu32_arith_sub.c index 6ca517b9..dbdaddb5 100644 --- a/sources/ippcp/pcpbnu32_arith_sub.c +++ b/sources/ippcp/pcpbnu32_arith_sub.c @@ -14,14 +14,14 @@ * limitations under the License. *************************************************************************/ -/* +/* // Purpose: // Intel(R) Integrated Performance Primitives. Cryptography Primitives. // Internal BNU32 arithmetic. -// +// // Contents: // cpSub_BNU32() -// +// */ #include "owncp.h" @@ -33,7 +33,7 @@ /*F* // Name: cpSub_BNU32 // -// Purpose: substract BNU32. +// Purpose: subtract BNU32. // // Returns: // borrow diff --git a/sources/ippcp/pcpbnu32misc.h b/sources/ippcp/pcpbnu32misc.h index 308c41c7..c07d26d2 100644 --- a/sources/ippcp/pcpbnu32misc.h +++ b/sources/ippcp/pcpbnu32misc.h @@ -34,7 +34,7 @@ #define cpNLZ_BNU32 OWNAPI(cpNLZ_BNU32) IPP_OWN_DECL (cpSize, cpNLZ_BNU32, (Ipp32u x)) #else - __INLINE cpSize cpNLZ_BNU32(Ipp32u x) + __IPPCP_INLINE cpSize cpNLZ_BNU32(Ipp32u x) { return (cpSize)_lzcnt_u32(x); } @@ -52,7 +52,7 @@ // nsA size of BNU // */ -__INLINE int cpFix_BNU32(const Ipp32u* pA, int nsA) +__IPPCP_INLINE int cpFix_BNU32(const Ipp32u* pA, int nsA) { Ipp32u zscan = (Ipp32u)(-1); int outLen = nsA; @@ -67,7 +67,7 @@ __INLINE int cpFix_BNU32(const Ipp32u* pA, int nsA) /* most significant BNU bit */ #if 0 -__INLINE int cpMSBit_BNU32(const Ipp32u* pA, cpSize nsA) +__IPPCP_INLINE int cpMSBit_BNU32(const Ipp32u* pA, cpSize nsA) { FIX_BNU(pA, nsA); return nsA*BITSIZE(Ipp32u) - cpNLZ_BNU32(pA[nsA-1]) -1; @@ -75,7 +75,7 @@ __INLINE int cpMSBit_BNU32(const Ipp32u* pA, cpSize nsA) #endif #if 0 -__INLINE int cpCmp_BNU32(const Ipp32u* pA, cpSize nsA, const Ipp32u* pB, cpSize nsB) +__IPPCP_INLINE int cpCmp_BNU32(const Ipp32u* pA, cpSize nsA, const Ipp32u* pB, cpSize nsB) { if(nsA!=nsB) return nsA>nsB? 1 : -1; diff --git a/sources/ippcp/pcpbnuarith.h b/sources/ippcp/pcpbnuarith.h index dd36a0f5..1cf5a820 100644 --- a/sources/ippcp/pcpbnuarith.h +++ b/sources/ippcp/pcpbnuarith.h @@ -14,12 +14,12 @@ * limitations under the License. *************************************************************************/ -/* +/* // Purpose: // Intel(R) Integrated Performance Primitives. // Internal Unsigned internal arithmetic -// -// +// +// */ #if !defined(_CP_BNU_ARITH_H) @@ -60,7 +60,7 @@ // *F*/ -__INLINE BNU_CHUNK_T cpMul_BNU_school(BNU_CHUNK_T* pR, +__IPPCP_INLINE BNU_CHUNK_T cpMul_BNU_school(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, cpSize nsA, const BNU_CHUNK_T* pB, cpSize nsB) { @@ -94,7 +94,7 @@ __INLINE BNU_CHUNK_T cpMul_BNU_school(BNU_CHUNK_T* pR, // *F*/ -__INLINE BNU_CHUNK_T cpSqr_BNU_school(BNU_CHUNK_T * pR, const BNU_CHUNK_T * pA, cpSize nsA) +__IPPCP_INLINE BNU_CHUNK_T cpSqr_BNU_school(BNU_CHUNK_T * pR, const BNU_CHUNK_T * pA, cpSize nsA) { #if(_ADCOX_NI_ENABLING_==_FEATURE_ON_) return cpSqrAdx_BNU_school(pR, pA,nsA); @@ -114,7 +114,7 @@ __INLINE BNU_CHUNK_T cpSqr_BNU_school(BNU_CHUNK_T * pR, const BNU_CHUNK_T * pA, /* // multiplication/squaring wrappers */ -__INLINE BNU_CHUNK_T cpMul_BNU(BNU_CHUNK_T* pR, +__IPPCP_INLINE BNU_CHUNK_T cpMul_BNU(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, cpSize nsA, const BNU_CHUNK_T* pB, cpSize nsB, BNU_CHUNK_T* pBuffer) @@ -122,7 +122,7 @@ __INLINE BNU_CHUNK_T cpMul_BNU(BNU_CHUNK_T* pR, IPP_UNREFERENCED_PARAMETER(pBuffer); return cpMul_BNU_school(pR, pA,nsA, pB,nsB); } -__INLINE BNU_CHUNK_T cpSqr_BNU(BNU_CHUNK_T * pR, +__IPPCP_INLINE BNU_CHUNK_T cpSqr_BNU(BNU_CHUNK_T * pR, const BNU_CHUNK_T * pA, cpSize nsA, BNU_CHUNK_T* pBuffer) { @@ -148,7 +148,7 @@ __INLINE BNU_CHUNK_T cpSqr_BNU(BNU_CHUNK_T * pR, // *F*/ -__INLINE cpSize cpDiv_BNU(BNU_CHUNK_T* pQ, cpSize* pnsQ, BNU_CHUNK_T* pA, cpSize nsA, BNU_CHUNK_T* pB, cpSize nsB) +__IPPCP_INLINE cpSize cpDiv_BNU(BNU_CHUNK_T* pQ, cpSize* pnsQ, BNU_CHUNK_T* pA, cpSize nsA, BNU_CHUNK_T* pB, cpSize nsB) { int nsR = cpDiv_BNU32((Ipp32u*)pQ, pnsQ, (Ipp32u*)pA, nsA*(Ipp32s)(sizeof(BNU_CHUNK_T)/sizeof(Ipp32u)), @@ -180,7 +180,7 @@ __INLINE cpSize cpDiv_BNU(BNU_CHUNK_T* pQ, cpSize* pnsQ, BNU_CHUNK_T* pA, cpSize // *F*/ -__INLINE cpSize cpMod_BNU(BNU_CHUNK_T* pX, cpSize nsX, BNU_CHUNK_T* pModulus, cpSize nsM) +__IPPCP_INLINE cpSize cpMod_BNU(BNU_CHUNK_T* pX, cpSize nsX, BNU_CHUNK_T* pModulus, cpSize nsM) { return cpDiv_BNU(NULL,NULL, pX,nsX, pModulus, nsM); } diff --git a/sources/ippcp/pcpbnumisc.h b/sources/ippcp/pcpbnumisc.h index 7656ddaf..33b04a82 100644 --- a/sources/ippcp/pcpbnumisc.h +++ b/sources/ippcp/pcpbnumisc.h @@ -67,10 +67,10 @@ /* copy and set */ -__INLINE void cpCpy_BNU(BNU_CHUNK_T* pDst, const BNU_CHUNK_T* pSrc, cpSize ns) +__IPPCP_INLINE void cpCpy_BNU(BNU_CHUNK_T* pDst, const BNU_CHUNK_T* pSrc, cpSize ns) { COPY_BNU(pDst, pSrc, ns); } -__INLINE void cpSet_BNU(BNU_CHUNK_T* pDst, cpSize ns, BNU_CHUNK_T val) +__IPPCP_INLINE void cpSet_BNU(BNU_CHUNK_T* pDst, cpSize ns, BNU_CHUNK_T val) { ZEXPAND_BNU(pDst, 0, ns); pDst[0] = val; @@ -90,7 +90,7 @@ __INLINE void cpSet_BNU(BNU_CHUNK_T* pDst, cpSize ns, BNU_CHUNK_T val) // nsA Size of pA // */ -__INLINE int cpFix_BNU(const BNU_CHUNK_T* pA, int nsA) +__IPPCP_INLINE int cpFix_BNU(const BNU_CHUNK_T* pA, int nsA) { BNU_CHUNK_T zscan = (BNU_CHUNK_T)(-1); int outLen = nsA; @@ -120,7 +120,7 @@ __INLINE int cpFix_BNU(const BNU_CHUNK_T* pA, int nsA) // */ #if 0 -__INLINE int cpCmp_BNU(const BNU_CHUNK_T* pA, cpSize nsA, const BNU_CHUNK_T* pB, cpSize nsB) +__IPPCP_INLINE int cpCmp_BNU(const BNU_CHUNK_T* pA, cpSize nsA, const BNU_CHUNK_T* pB, cpSize nsB) { if(nsA!=nsB) return nsA>nsB? 1 : -1; @@ -133,7 +133,7 @@ __INLINE int cpCmp_BNU(const BNU_CHUNK_T* pA, cpSize nsA, const BNU_CHUNK_T* pB, } #endif -__INLINE int cpCmp_BNU0(const BNU_CHUNK_T* a, const BNU_CHUNK_T* b, int len) +__IPPCP_INLINE int cpCmp_BNU0(const BNU_CHUNK_T* a, const BNU_CHUNK_T* b, int len) { const Ipp32u* a32 = (const Ipp32u*)a; const Ipp32u* b32 = (const Ipp32u*)b; @@ -153,7 +153,7 @@ __INLINE int cpCmp_BNU0(const BNU_CHUNK_T* a, const BNU_CHUNK_T* b, int len) return (int)(resb|resd); } -__INLINE int cpCmp_BNU(const BNU_CHUNK_T* a, int aLen, const BNU_CHUNK_T* b, int bLen) +__IPPCP_INLINE int cpCmp_BNU(const BNU_CHUNK_T* a, int aLen, const BNU_CHUNK_T* b, int bLen) { BNU_CHUNK_T aLen_eq_bLen = cpIsZero_ct((BNU_CHUNK_T)(aLen-bLen)); // FFFF/0000 if (aLen=bLen) / (aLen!=bLen) BNU_CHUNK_T aLen_gt_bLen = cpIsMsb_ct((BNU_CHUNK_T)(bLen-aLen)) & 1; // 1/0 if (aLen>bLen) / (aLen0, if A > 0 // <0, looks like impossible (or error) case */ -__INLINE int cpTst_BNU(const BNU_CHUNK_T* pA, int nsA) +__IPPCP_INLINE int cpTst_BNU(const BNU_CHUNK_T* pA, int nsA) { for(; (nsA>0) && (0==pA[nsA-1]); nsA--) ; return nsA; @@ -208,7 +208,7 @@ __INLINE int cpTst_BNU(const BNU_CHUNK_T* pA, int nsA) #define cpNLZ_BNU OWNAPI(cpNLZ_BNU) IPP_OWN_DECL (cpSize, cpNLZ_BNU, (BNU_CHUNK_T x)) #else - __INLINE cpSize cpNLZ_BNU(BNU_CHUNK_T x) + __IPPCP_INLINE cpSize cpNLZ_BNU(BNU_CHUNK_T x) { #if (BNU_CHUNK_BITS == BNU_CHUNK_64BIT) return (cpSize)_lzcnt_u64(x); diff --git a/sources/ippcp/pcpdescipherm.c b/sources/ippcp/pcpdescipherm.c index 1bf9de80..a07cfcaa 100644 --- a/sources/ippcp/pcpdescipherm.c +++ b/sources/ippcp/pcpdescipherm.c @@ -14,19 +14,19 @@ * limitations under the License. *************************************************************************/ -/* -// +/* +// // Purpose: // Cryptography Primitive. // DES Cipher function (MemJam mitigation included) -// +// // Contents: // initial permutation: ip() // final permutation: fp() // round function: rndm() -// DES block encypt/decrypt: Chipher_DES() -// -// +// DES block encrypt/decrypt: Chipher_DES() +// +// */ diff --git a/sources/ippcp/pcpdlpgeneratedh.c b/sources/ippcp/pcpdlpgeneratedh.c index 9629a71d..88f5aac2 100644 --- a/sources/ippcp/pcpdlpgeneratedh.c +++ b/sources/ippcp/pcpdlpgeneratedh.c @@ -14,17 +14,17 @@ * limitations under the License. *************************************************************************/ -/* -// +/* +// // Purpose: // Cryptography Primitive. // DL over Prime Finite Field (generate domain parameters) -// +// // Contents: // ippsDLPGenerateDH() // ippsDLPGenerateDSA() -// -// +// +// */ #include "owndefs.h" @@ -103,7 +103,7 @@ IPPFUN(IppStatus, ippsDLPGenerateDH,(const IppsBigNumState* pSeedIn, IppsBigNumState* pSeed1 = cpBigNumListGet(&pList); IppsBigNumState* pSeed2 = cpBigNumListGet(&pList); - /* interally generates SeedIn value by default */ + /* internally generates SeedIn value by default */ IppBool seed_is_random = ippTrue; int seedBitSize = DLP_BITSIZER(pDL); diff --git a/sources/ippcp/pcpdlpgeneratedsa.c b/sources/ippcp/pcpdlpgeneratedsa.c index be1b559d..9bfb7855 100644 --- a/sources/ippcp/pcpdlpgeneratedsa.c +++ b/sources/ippcp/pcpdlpgeneratedsa.c @@ -109,7 +109,7 @@ IPPFUN(IppStatus, ippsDLPGenerateDSA,(const IppsBigNumState* pSeedIn, IppsBigNumState* pSeed = cpBigNumListGet(&pList); - /* interally generates SeedIn value */ + /* internally generates SeedIn value */ int seedBitSize = MIN_DLPDSA_SEEDSIZE; IppBool seed_is_random = ippTrue; diff --git a/sources/ippcp/pcpeccp.h b/sources/ippcp/pcpeccp.h index aa108544..311c9fd3 100644 --- a/sources/ippcp/pcpeccp.h +++ b/sources/ippcp/pcpeccp.h @@ -14,13 +14,13 @@ * limitations under the License. *************************************************************************/ -/* -// +/* +// // Purpose: // Cryptography Primitive. // Internal ECC (prime) basic Definitions & Function Prototypes -// -// +// +// */ #if !defined(_NEW_PCP_ECCP_H) @@ -29,7 +29,7 @@ #include "pcpgfpecstuff.h" -__INLINE IppsBigNumState* cpConstructBN(IppsBigNumState* pBN, cpSize len, BNU_CHUNK_T* pData, BNU_CHUNK_T* pBuffer) +__IPPCP_INLINE IppsBigNumState* cpConstructBN(IppsBigNumState* pBN, cpSize len, BNU_CHUNK_T* pData, BNU_CHUNK_T* pBuffer) { BN_SET_ID(pBN); BN_SIGN(pBN) = ippBigNumPOS; @@ -164,7 +164,7 @@ extern const BNU_CHUNK_T h_secp384r1_p[]; extern const BNU_CHUNK_T h_secp521r1_p[]; extern const BNU_CHUNK_T h_tpmSM2_p256_p[]; -__INLINE BNU_CHUNK_T* cpModAdd_BNU(BNU_CHUNK_T* pR, +__IPPCP_INLINE BNU_CHUNK_T* cpModAdd_BNU(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, const BNU_CHUNK_T* pB, const BNU_CHUNK_T* pM, int ns, BNU_CHUNK_T* pBuffer) @@ -175,7 +175,7 @@ __INLINE BNU_CHUNK_T* cpModAdd_BNU(BNU_CHUNK_T* pR, return pR; } -__INLINE BNU_CHUNK_T* cpModSub_BNU(BNU_CHUNK_T* pR, +__IPPCP_INLINE BNU_CHUNK_T* cpModSub_BNU(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, const BNU_CHUNK_T* pB, const BNU_CHUNK_T* pM, int ns, BNU_CHUNK_T* pBuffer) diff --git a/sources/ippcp/pcpgfpecessm2.h b/sources/ippcp/pcpgfpecessm2.h index fd56c876..5fd1a28a 100644 --- a/sources/ippcp/pcpgfpecessm2.h +++ b/sources/ippcp/pcpgfpecessm2.h @@ -54,7 +54,7 @@ struct _cpStateECES_SM2 { #define VALID_ECES_SM2_ID(stt) ((((stt)->idCtx) ^ (Ipp32u)IPP_UINT_PTR((stt))) == (Ipp32u)idxCtxECES_SM2) /* get a byte, update 0-kdf status */ -__INLINE Ipp8u cpECES_SM2KdfNextByte(IppsECESState_SM2* pState) { +__IPPCP_INLINE Ipp8u cpECES_SM2KdfNextByte(IppsECESState_SM2* pState) { if (pState->kdfIndex == IPP_SM3_DIGEST_BITSIZE / BYTESIZE) { ++pState->kdfCounter; pState->kdfIndex = 0; diff --git a/sources/ippcp/pcpgfpecstuff.h b/sources/ippcp/pcpgfpecstuff.h index 79a593c9..4d432ae9 100644 --- a/sources/ippcp/pcpgfpecstuff.h +++ b/sources/ippcp/pcpgfpecstuff.h @@ -188,22 +188,22 @@ IPP_OWN_DECL (const cpPrecompAP*, gfpec_precom_sm2_radix52_fun, (void)) /* // get/release n points from/to the pool */ -__INLINE BNU_CHUNK_T* cpEcGFpGetPool(int n, IppsGFpECState* pEC) +__IPPCP_INLINE BNU_CHUNK_T* cpEcGFpGetPool(int n, IppsGFpECState* pEC) { BNU_CHUNK_T* pPool = ECP_POOL(pEC); ECP_POOL(pEC) += n*GFP_FELEN(GFP_PMA(ECP_GFP(pEC)))*3; return pPool; } -__INLINE void cpEcGFpReleasePool(int n, IppsGFpECState* pEC) +__IPPCP_INLINE void cpEcGFpReleasePool(int n, IppsGFpECState* pEC) { int chunk_size = n*GFP_FELEN(GFP_PMA(ECP_GFP(pEC)))*3; ECP_POOL(pEC) -= chunk_size; - // Clean the pool for the security reasons + // Clean the pool for the security reasons // (intermediate sensitive data may be stored here) - ZEXPAND_BNU(ECP_POOL(pEC), 0, chunk_size); + ZEXPAND_BNU(ECP_POOL(pEC), 0, chunk_size); } -__INLINE IppsGFpECPoint* cpEcGFpInitPoint(IppsGFpECPoint* pPoint, BNU_CHUNK_T* pData, int flags, const IppsGFpECState* pEC) +__IPPCP_INLINE IppsGFpECPoint* cpEcGFpInitPoint(IppsGFpECPoint* pPoint, BNU_CHUNK_T* pData, int flags, const IppsGFpECState* pEC) { ECP_POINT_SET_ID(pPoint); ECP_POINT_FLAGS(pPoint) = flags; @@ -213,7 +213,7 @@ __INLINE IppsGFpECPoint* cpEcGFpInitPoint(IppsGFpECPoint* pPoint, BNU_CHUNK_T* p } /* copy one point into another */ -__INLINE IppsGFpECPoint* gfec_CopyPoint(IppsGFpECPoint* pPointR, const IppsGFpECPoint* pPointA, int elemLen) +__IPPCP_INLINE IppsGFpECPoint* gfec_CopyPoint(IppsGFpECPoint* pPointR, const IppsGFpECPoint* pPointA, int elemLen) { cpGFpElementCopy(ECP_POINT_DATA(pPointR), ECP_POINT_DATA(pPointA), 3*elemLen); ECP_POINT_FLAGS(pPointR) = ECP_POINT_FLAGS(pPointA); @@ -221,7 +221,7 @@ __INLINE IppsGFpECPoint* gfec_CopyPoint(IppsGFpECPoint* pPointR, const IppsGFpEC } -__INLINE IppsGFpECPoint* gfec_SetPointAtInfinity(IppsGFpECPoint* pPoint) +__IPPCP_INLINE IppsGFpECPoint* gfec_SetPointAtInfinity(IppsGFpECPoint* pPoint) { int elemLen = ECP_POINT_FELEN(pPoint); cpGFpElementPad(ECP_POINT_X(pPoint), elemLen, 0); @@ -235,7 +235,7 @@ __INLINE IppsGFpECPoint* gfec_SetPointAtInfinity(IppsGFpECPoint* pPoint) // test infinity: // IsProjectivePointAtInfinity */ -__INLINE int gfec_IsPointAtInfinity(const IppsGFpECPoint* pPoint) +__IPPCP_INLINE int gfec_IsPointAtInfinity(const IppsGFpECPoint* pPoint) { return GFP_IS_ZERO( ECP_POINT_Z(pPoint), ECP_POINT_FELEN(pPoint)); } @@ -243,7 +243,7 @@ __INLINE int gfec_IsPointAtInfinity(const IppsGFpECPoint* pPoint) /* signed encode */ -__INLINE void booth_recode(Ipp8u* sign, Ipp8u* digit, Ipp8u in, int w) +__IPPCP_INLINE void booth_recode(Ipp8u* sign, Ipp8u* digit, Ipp8u in, int w) { Ipp8u s = (Ipp8u)(~((in >> w) - 1)); int d = (1 << (w+1)) - in - 1; @@ -288,7 +288,7 @@ IPP_OWN_DECL (int, gfec_MakePoint, (IppsGFpECPoint* pPoint, const BNU_CHUNK_T* p IPP_OWN_DECL (int, gfec_ComparePoint, (const IppsGFpECPoint* pP, const IppsGFpECPoint* pQ, IppsGFpECState* pEC)) IPP_OWN_DECL (int, gfec_IsPointOnCurve, (const IppsGFpECPoint* pP, IppsGFpECState* pEC)) -__INLINE IppsGFpECPoint* gfec_DblPoint(IppsGFpECPoint* pR, +__IPPCP_INLINE IppsGFpECPoint* gfec_DblPoint(IppsGFpECPoint* pR, const IppsGFpECPoint* pP, IppsGFpECState* pEC) { gfec_point_double(ECP_POINT_X(pR), ECP_POINT_X(pP), pEC); @@ -296,7 +296,7 @@ __INLINE IppsGFpECPoint* gfec_DblPoint(IppsGFpECPoint* pR, return pR; } -__INLINE IppsGFpECPoint* gfec_AddPoint(IppsGFpECPoint* pR, +__IPPCP_INLINE IppsGFpECPoint* gfec_AddPoint(IppsGFpECPoint* pR, const IppsGFpECPoint* pP, const IppsGFpECPoint* pQ, IppsGFpECState* pEC) { diff --git a/sources/ippcp/pcpgfpstuff.h b/sources/ippcp/pcpgfpstuff.h index d7adeb84..17aca720 100644 --- a/sources/ippcp/pcpgfpstuff.h +++ b/sources/ippcp/pcpgfpstuff.h @@ -89,24 +89,24 @@ typedef struct _cpGFp { #define cpGFpReleasePool(n, gfe) gsModPoolFree((gfe), (n)) -__INLINE int cpGFpElementLen(const BNU_CHUNK_T* pE, int nsE) +__IPPCP_INLINE int cpGFpElementLen(const BNU_CHUNK_T* pE, int nsE) { for(; nsE>1 && 0==pE[nsE-1]; nsE--) ; return nsE; } -__INLINE BNU_CHUNK_T* cpGFpElementCopy(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pE, int nsE) +__IPPCP_INLINE BNU_CHUNK_T* cpGFpElementCopy(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pE, int nsE) { int n; for(n=0; nadd(pR, pA, pB, pGFE); } -__INLINE BNU_CHUNK_T* cpGFpSub(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, const BNU_CHUNK_T* pB, gsModEngine* pGFE) +__IPPCP_INLINE BNU_CHUNK_T* cpGFpSub(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, const BNU_CHUNK_T* pB, gsModEngine* pGFE) { return GFP_METHOD(pGFE)->sub(pR, pA, pB, pGFE); } -__INLINE BNU_CHUNK_T* cpGFpNeg(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, gsModEngine* pGFE) +__IPPCP_INLINE BNU_CHUNK_T* cpGFpNeg(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, gsModEngine* pGFE) { return GFP_METHOD(pGFE)->neg(pR, pA, pGFE); } -__INLINE BNU_CHUNK_T* cpGFpMul(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, const BNU_CHUNK_T* pB, gsModEngine* pGFE) +__IPPCP_INLINE BNU_CHUNK_T* cpGFpMul(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, const BNU_CHUNK_T* pB, gsModEngine* pGFE) { return GFP_METHOD(pGFE)->mul(pR, pA, pB, pGFE); } -__INLINE BNU_CHUNK_T* cpGFpSqr(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, gsModEngine* pGFE) +__IPPCP_INLINE BNU_CHUNK_T* cpGFpSqr(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, gsModEngine* pGFE) { return GFP_METHOD(pGFE)->sqr(pR, pA, pGFE); } -__INLINE BNU_CHUNK_T* cpGFpHalve(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, gsModEngine* pGFE) +__IPPCP_INLINE BNU_CHUNK_T* cpGFpHalve(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, gsModEngine* pGFE) { return GFP_METHOD(pGFE)->div2(pR, pA, pGFE); } @@ -169,7 +169,7 @@ __INLINE BNU_CHUNK_T* cpGFpHalve(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, gsModEn /* construct GF element */ -__INLINE IppsGFpElement* cpGFpElementConstruct(IppsGFpElement* pR, BNU_CHUNK_T* pDataBufer, int ns) +__IPPCP_INLINE IppsGFpElement* cpGFpElementConstruct(IppsGFpElement* pR, BNU_CHUNK_T* pDataBufer, int ns) { GFPE_SET_ID(pR); GFPE_ROOM(pR) = ns; diff --git a/sources/ippcp/pcpgfpxinit.c b/sources/ippcp/pcpgfpxinit.c index 51e8bb9a..0b0e8430 100644 --- a/sources/ippcp/pcpgfpxinit.c +++ b/sources/ippcp/pcpgfpxinit.c @@ -14,10 +14,10 @@ * limitations under the License. *************************************************************************/ -/* +/* // Intel(R) Integrated Performance Primitives. Cryptography Primitives. // Operations over GF(p) ectension. -// +// // Context: // pcpgfpxinit.c() // @@ -51,7 +51,7 @@ // (IPP_MIN_GF_EXTDEG==2, IPP_MAX_GF_EXTDEG==8) // 1>nElm || nElm>extDeg // -// cpID_Poly!=pGFpMethod->modulusID -- method does not refferenced to polynomial one +// cpID_Poly!=pGFpMethod->modulusID -- method does not reference the polynomial one // pGFpMethod->modulusBitDeg!=extDeg -- fixed method does not match to degree extension // // ippStsNoErr no error diff --git a/sources/ippcp/pcpgfpxinitbinomial.c b/sources/ippcp/pcpgfpxinitbinomial.c index 96dd34f1..062a4cf7 100644 --- a/sources/ippcp/pcpgfpxinitbinomial.c +++ b/sources/ippcp/pcpgfpxinitbinomial.c @@ -14,10 +14,10 @@ * limitations under the License. *************************************************************************/ -/* +/* // Intel(R) Integrated Performance Primitives. Cryptography Primitives. // Operations over GF(p) ectension. -// +// // Context: // pcpgfpxinitbinomial.c() // @@ -49,7 +49,7 @@ // ippStsBadArgErr IPP_MIN_GF_EXTDEG > extDeg || extDeg > IPP_MAX_GF_EXTDEG // (IPP_MIN_GF_EXTDEG==2, IPP_MAX_GF_EXTDEG==8) // -// cpID_Poly!=pGFpMethod->modulusID -- method does not refferenced to polynomial one +// cpID_Poly!=pGFpMethod->modulusID -- method does not reference the polynomial one // pGFpMethod->modulusBitDeg!=extDeg -- fixed method does not match to degree extension // // ippStsNoErr no error diff --git a/sources/ippcp/pcpgfpxmethod_binom_epid2.c b/sources/ippcp/pcpgfpxmethod_binom_epid2.c index 2798decf..0daaeda5 100644 --- a/sources/ippcp/pcpgfpxmethod_binom_epid2.c +++ b/sources/ippcp/pcpgfpxmethod_binom_epid2.c @@ -57,7 +57,7 @@ // The case is important in GF(((p^2)^3)^2) arithmetic for Intel(R) EPID 2.0. // */ -__INLINE BNU_CHUNK_T* cpFq6Mul_vi(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, gsEngine* pGFEx) +__IPPCP_INLINE BNU_CHUNK_T* cpFq6Mul_vi(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, gsEngine* pGFEx) { gsEngine* pGroundGFE = GFP_PARENT(pGFEx); int termLen = GFP_FELEN(pGroundGFE); @@ -250,7 +250,7 @@ static gsModMethod* gsPolyArith_binom2_epid2 (void) // // Purpose: Returns a reference to the implementation of arithmetic operations over GF(pd). // -// Returns: pointer to a structure containing +// Returns: pointer to a structure containing // an implementation of arithmetic operations over GF(pd) // g(x) = x^2 - a0, a0 from GF(q), a0 = 1 // g(w) = w^2 - V0, v0 from GF((q^2)^3), V0 = 0*s^2 + v + 0 diff --git a/sources/ippcp/pcpgfpxmethod_binom_epid2.h b/sources/ippcp/pcpgfpxmethod_binom_epid2.h index 2b097c98..1f492b71 100644 --- a/sources/ippcp/pcpgfpxmethod_binom_epid2.h +++ b/sources/ippcp/pcpgfpxmethod_binom_epid2.h @@ -57,7 +57,7 @@ // The case is important in GF((p^2)^3) arithmetic for Intel(R) EPID 2.0. // */ -__INLINE BNU_CHUNK_T* cpFq2Mul_xi(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, gsEngine* pGFEx) +__IPPCP_INLINE BNU_CHUNK_T* cpFq2Mul_xi(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, gsEngine* pGFEx) { gsEngine* pGroundGFE = GFP_PARENT(pGFEx); mod_mul addF = GFP_METHOD(pGroundGFE)->add; diff --git a/sources/ippcp/pcpgfpxstuff.h b/sources/ippcp/pcpgfpxstuff.h index b5d63881..d892e3c5 100644 --- a/sources/ippcp/pcpgfpxstuff.h +++ b/sources/ippcp/pcpgfpxstuff.h @@ -14,11 +14,11 @@ * limitations under the License. *************************************************************************/ -/* +/* // Intel(R) Integrated Performance Primitives // Cryptographic Primitives (ippCP) // GF(p) extension internal -// +// */ #if !defined(_PCP_GFPEXT_H_) @@ -35,7 +35,7 @@ #define GFPX_IDX_ELEMENT(pxe, idx, eleSize) ((pxe)+(eleSize)*(idx)) -__INLINE int degree(const BNU_CHUNK_T* pE, const gsModEngine* pGFEx) +__IPPCP_INLINE int degree(const BNU_CHUNK_T* pE, const gsModEngine* pGFEx) { int groundElemLen = GFP_FELEN(GFP_PARENT(pGFEx)); int deg; @@ -45,14 +45,14 @@ __INLINE int degree(const BNU_CHUNK_T* pE, const gsModEngine* pGFEx) return deg; } -__INLINE gsModEngine* cpGFpBasic(const gsModEngine* pGFEx) +__IPPCP_INLINE gsModEngine* cpGFpBasic(const gsModEngine* pGFEx) { while( !GFP_IS_BASIC(pGFEx) ) { pGFEx = GFP_PARENT(pGFEx); } return (gsModEngine*)pGFEx; } -__INLINE int cpGFpBasicDegreeExtension(const gsModEngine* pGFEx) +__IPPCP_INLINE int cpGFpBasicDegreeExtension(const gsModEngine* pGFEx) { int degree = GFP_EXTDEGREE(pGFEx); while( !GFP_IS_BASIC(pGFEx) ) { @@ -65,7 +65,7 @@ __INLINE int cpGFpBasicDegreeExtension(const gsModEngine* pGFEx) /* convert external data (Ipp32u) => internal element (BNU_CHUNK_T) representation returns length of element (in BNU_CHUNK_T) */ -__INLINE int cpGFpxCopyToChunk(BNU_CHUNK_T* pElm, const Ipp32u* pA, int nsA, const gsModEngine* pGFEx) +__IPPCP_INLINE int cpGFpxCopyToChunk(BNU_CHUNK_T* pElm, const Ipp32u* pA, int nsA, const gsModEngine* pGFEx) { gsModEngine* pBasicGFE = cpGFpBasic(pGFEx); int basicExtension = cpGFpBasicDegreeExtension(pGFEx); @@ -84,7 +84,7 @@ __INLINE int cpGFpxCopyToChunk(BNU_CHUNK_T* pElm, const Ipp32u* pA, int nsA, con /* convert internal element (BNU_CHUNK_T) => external data (Ipp32u) representation returns length of data (in Ipp32u) */ -__INLINE int cpGFpxCopyFromChunk(Ipp32u* pA, const BNU_CHUNK_T* pElm, const gsModEngine* pGFEx) +__IPPCP_INLINE int cpGFpxCopyFromChunk(Ipp32u* pA, const BNU_CHUNK_T* pElm, const gsModEngine* pGFEx) { gsModEngine* pBasicGFE = cpGFpBasic(pGFEx); int basicExtension = cpGFpBasicDegreeExtension(pGFEx); diff --git a/sources/ippcp/pcphash.h b/sources/ippcp/pcphash.h index bf24edf7..dcc1ba75 100644 --- a/sources/ippcp/pcphash.h +++ b/sources/ippcp/pcphash.h @@ -14,14 +14,14 @@ * limitations under the License. *************************************************************************/ -/* -// +/* +// // Purpose: // Cryptography Primitive. // Security Hash Standard // Internal Definitions and Internal Functions Prototypes -// -// +// +// */ #if !defined(_PCP_HASH_H) @@ -183,19 +183,19 @@ extern const Ipp8u* cpHashIV[]; extern const cpHashAttr cpHashAlgAttr[]; /* IV size helper */ -__INLINE int cpHashIvSize(IppHashAlgId algID) +__IPPCP_INLINE int cpHashIvSize(IppHashAlgId algID) { return cpHashAlgAttr[algID].ivSize; } /* hash size helper */ -__INLINE int cpHashSize(IppHashAlgId algID) +__IPPCP_INLINE int cpHashSize(IppHashAlgId algID) { return cpHashAlgAttr[algID].hashSize; } /* message block size helper */ -__INLINE int cpHashMBS(IppHashAlgId algID) +__IPPCP_INLINE int cpHashMBS(IppHashAlgId algID) { return cpHashAlgAttr[algID].msgBlkSize; } /* maps algID into enabled IppHashAlgId value */ -__INLINE IppHashAlgId cpValidHashAlg(IppHashAlgId algID) +__IPPCP_INLINE IppHashAlgId cpValidHashAlg(IppHashAlgId algID) { /* maps algID into the valid range */ algID = (((int)ippHashAlg_Unknown < (int)algID) && ((int)algID < (int)ippHashAlg_MaxNo))? algID : ippHashAlg_Unknown; diff --git a/sources/ippcp/pcphashcnt.c b/sources/ippcp/pcphashcnt.c index 4456b452..7bd0b8de 100644 --- a/sources/ippcp/pcphashcnt.c +++ b/sources/ippcp/pcphashcnt.c @@ -14,14 +14,14 @@ * limitations under the License. *************************************************************************/ -/* -// +/* +// // Purpose: // Cryptography Primitive. // Security Hash Standard // Constants -// -// +// +// */ #include "owndefs.h" @@ -219,7 +219,7 @@ const Ipp8u* cpHashIV[] = { //////////////////////////////////////////////////////////// /* -// additive constatns +// additive constants */ #if defined(_ENABLE_ALG_SHA1_) __ALIGN16 const Ipp32u SHA1_cnt[] = { diff --git a/sources/ippcp/pcphashsha1px.c b/sources/ippcp/pcphashsha1px.c index 34b9fedd..31b7476c 100644 --- a/sources/ippcp/pcphashsha1px.c +++ b/sources/ippcp/pcphashsha1px.c @@ -14,16 +14,16 @@ * limitations under the License. *************************************************************************/ -/* -// +/* +// // Purpose: // Cryptography Primitive. // Message block processing according to SHA1 -// +// // Contents: // UpdateSHA1() -// -// +// +// */ #include "owndefs.h" @@ -62,7 +62,7 @@ } #if defined(_ALG_SHA1_COMPACT_) -__INLINE Ipp32u MagicFun(int s, Ipp32u b, Ipp32u c, Ipp32u d) +__IPPCP_INLINE Ipp32u MagicFun(int s, Ipp32u b, Ipp32u c, Ipp32u d) { switch(s) { case 0: return MAGIC_F0(b,c,d); diff --git a/sources/ippcp/pcphashsm3px.c b/sources/ippcp/pcphashsm3px.c index c5392180..e8267039 100644 --- a/sources/ippcp/pcphashsm3px.c +++ b/sources/ippcp/pcphashsm3px.c @@ -14,16 +14,16 @@ * limitations under the License. *************************************************************************/ -/* -// +/* +// // Purpose: // Cryptography Primitive. // Message block processing according to SM5 -// +// // Contents: // UpdateSM3() -// -// +// +// */ #include "owndefs.h" @@ -141,14 +141,14 @@ *F*/ #if defined(_ALG_SM3_COMPACT_) -__INLINE Ipp32u MagicFF(int s, Ipp32u a, Ipp32u b, Ipp32u c) +__IPPCP_INLINE Ipp32u MagicFF(int s, Ipp32u a, Ipp32u b, Ipp32u c) { switch(s) { case 0: return FF1(a,b,c); default:return FF2(a,b,c); } } -__INLINE Ipp32u MagicGG(int s, Ipp32u e, Ipp32u f, Ipp32u g) +__IPPCP_INLINE Ipp32u MagicGG(int s, Ipp32u e, Ipp32u f, Ipp32u g) { switch(s) { case 0: return GG1(e,f,g); diff --git a/sources/ippcp/pcphashupdate.c b/sources/ippcp/pcphashupdate.c index eb2b5642..940a3079 100644 --- a/sources/ippcp/pcphashupdate.c +++ b/sources/ippcp/pcphashupdate.c @@ -14,13 +14,13 @@ * limitations under the License. *************************************************************************/ -/* -// +/* +// // Purpose: // Cryptography Primitive. // Security Hash Standard // General Functionality -// +// // Contents: // ippsHashUpdate() // @@ -50,7 +50,7 @@ // pState pointer to the Hash context // *F*/ -__INLINE int IsExceedMsgLen(Ipp64u maxLo, Ipp64u maxHi, Ipp64u lenLo, Ipp64u lenHi) +__IPPCP_INLINE int IsExceedMsgLen(Ipp64u maxLo, Ipp64u maxHi, Ipp64u lenLo, Ipp64u lenHi) { int isExceed = lenLo > maxLo; isExceed = (lenHi+(Ipp64u)isExceed) > maxHi; diff --git a/sources/ippcp/pcpmask_ct.h b/sources/ippcp/pcpmask_ct.h index 85245e23..0fa80c58 100644 --- a/sources/ippcp/pcpmask_ct.h +++ b/sources/ippcp/pcpmask_ct.h @@ -81,7 +81,7 @@ static __NOINLINE BNU_CHUNK_T cpIsMsb_ct(BNU_CHUNK_T a) #else /* replace under mask: dst[] = replaceFlag? src[] : dst[] */ -__INLINE void cpMaskedReplace_ct(BNU_CHUNK_T* dst, const BNU_CHUNK_T* src, int len, BNU_CHUNK_T replaceMask) +__IPPCP_INLINE void cpMaskedReplace_ct(BNU_CHUNK_T* dst, const BNU_CHUNK_T* src, int len, BNU_CHUNK_T replaceMask) { BNU_CHUNK_T dstMask = ~replaceMask; int n; @@ -90,7 +90,7 @@ __INLINE void cpMaskedReplace_ct(BNU_CHUNK_T* dst, const BNU_CHUNK_T* src, int l } /* copy under mask: dst[] = src1[] & mask) ^ src2[] & ~mask */ -__INLINE void cpMaskedCopyBNU_ct(BNU_CHUNK_T* dst, BNU_CHUNK_T mask, const BNU_CHUNK_T* src1, const BNU_CHUNK_T* src2, int len) +__IPPCP_INLINE void cpMaskedCopyBNU_ct(BNU_CHUNK_T* dst, BNU_CHUNK_T mask, const BNU_CHUNK_T* src1, const BNU_CHUNK_T* src2, int len) { int i; for(i=0; i> (sizeof(a) * 8 - 1)); } @@ -110,43 +110,43 @@ __INLINE BNU_CHUNK_T cpIsMsb_ct(BNU_CHUNK_T a) #endif /* tests if LSB(a)==1 */ -__INLINE BNU_CHUNK_T cpIsLsb_ct(BNU_CHUNK_T a) +__IPPCP_INLINE BNU_CHUNK_T cpIsLsb_ct(BNU_CHUNK_T a) { return (BNU_CHUNK_T)0 - (a & 1); } /* tests if a is odd */ -__INLINE BNU_CHUNK_T cpIsOdd_ct(BNU_CHUNK_T a) +__IPPCP_INLINE BNU_CHUNK_T cpIsOdd_ct(BNU_CHUNK_T a) { return cpIsLsb_ct(a); } /* tests if a is even */ -__INLINE BNU_CHUNK_T cpIsEven_ct(BNU_CHUNK_T a) +__IPPCP_INLINE BNU_CHUNK_T cpIsEven_ct(BNU_CHUNK_T a) { return ~cpIsLsb_ct(a); } /* tests if a==0 */ -__INLINE BNU_CHUNK_T cpIsZero_ct(BNU_CHUNK_T a) +__IPPCP_INLINE BNU_CHUNK_T cpIsZero_ct(BNU_CHUNK_T a) { return cpIsMsb_ct(~a & (a - 1)); } /* tests if a==b */ -__INLINE BNU_CHUNK_T cpIsEqu_ct(BNU_CHUNK_T a, BNU_CHUNK_T b) +__IPPCP_INLINE BNU_CHUNK_T cpIsEqu_ct(BNU_CHUNK_T a, BNU_CHUNK_T b) { return cpIsZero_ct(a ^ b); } /* test if ared(pR, pProduct, pModEngine); } -__INLINE void cpMontMul_BNU(BNU_CHUNK_T* pR, +__IPPCP_INLINE void cpMontMul_BNU(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, const BNU_CHUNK_T* pB, gsModEngine* pModEngine) @@ -79,7 +79,7 @@ __INLINE void cpMontMul_BNU(BNU_CHUNK_T* pR, MOD_METHOD( pModEngine )->mul(pR, pA, pB, pModEngine); } -__INLINE cpSize cpMontMul_BNU_EX(BNU_CHUNK_T* pR, +__IPPCP_INLINE cpSize cpMontMul_BNU_EX(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, cpSize nsA, const BNU_CHUNK_T* pB, cpSize nsB, gsModEngine* pModEngine) @@ -100,14 +100,14 @@ __INLINE cpSize cpMontMul_BNU_EX(BNU_CHUNK_T* pR, return nsM; } -__INLINE void cpMontSqr_BNU(BNU_CHUNK_T* pR, +__IPPCP_INLINE void cpMontSqr_BNU(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, gsModEngine* pModEngine) { MOD_METHOD( pModEngine )->sqr(pR, pA, pModEngine); } -__INLINE void cpMontSqr_BNU_EX(BNU_CHUNK_T* pR, +__IPPCP_INLINE void cpMontSqr_BNU_EX(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, cpSize nsA, gsModEngine* pModEngine) { @@ -120,7 +120,7 @@ __INLINE void cpMontSqr_BNU_EX(BNU_CHUNK_T* pR, /* // Montgomery encoding/decoding */ -__INLINE cpSize cpMontEnc_BNU(BNU_CHUNK_T* pR, +__IPPCP_INLINE cpSize cpMontEnc_BNU(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pXreg, gsModEngine* pModEngine) { @@ -132,7 +132,7 @@ __INLINE cpSize cpMontEnc_BNU(BNU_CHUNK_T* pR, return nsM; } -__INLINE cpSize cpMontEnc_BNU_EX(BNU_CHUNK_T* pR, +__IPPCP_INLINE cpSize cpMontEnc_BNU_EX(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pXreg, cpSize nsX, gsModEngine* pModEngine) { @@ -147,7 +147,7 @@ __INLINE cpSize cpMontEnc_BNU_EX(BNU_CHUNK_T* pR, return nsM; } -__INLINE cpSize cpMontDec_BNU(BNU_CHUNK_T* pR, +__IPPCP_INLINE cpSize cpMontDec_BNU(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pXmont, cpSize nsX, gsModEngine* pModEngine) { @@ -161,7 +161,7 @@ __INLINE cpSize cpMontDec_BNU(BNU_CHUNK_T* pR, return nsM; } -__INLINE void cpMontMul_BN(IppsBigNumState* pRbn, +__IPPCP_INLINE void cpMontMul_BN(IppsBigNumState* pRbn, const IppsBigNumState* pXbn, const IppsBigNumState* pYbn, gsModEngine* pModEngine) @@ -176,7 +176,7 @@ __INLINE void cpMontMul_BN(IppsBigNumState* pRbn, BN_SIGN(pRbn) = ippBigNumPOS; } -__INLINE void cpMontEnc_BN(IppsBigNumState* pRbn, +__IPPCP_INLINE void cpMontEnc_BN(IppsBigNumState* pRbn, const IppsBigNumState* pXbn, gsModEngine* pModEngine) { @@ -188,7 +188,7 @@ __INLINE void cpMontEnc_BN(IppsBigNumState* pRbn, BN_SIGN(pRbn) = ippBigNumPOS; } -__INLINE void cpMontDec_BN(IppsBigNumState* pRbn, +__IPPCP_INLINE void cpMontDec_BN(IppsBigNumState* pRbn, const IppsBigNumState* pXbn, gsModEngine* pModEngine) { @@ -207,7 +207,7 @@ __INLINE void cpMontDec_BN(IppsBigNumState* pRbn, #define cpMontExpBin_BNU_sscm OWNAPI(cpMontExpBin_BNU_sscm) IPP_OWN_DECL (cpSize, cpMontExpBin_BNU_sscm, (BNU_CHUNK_T* pY, const BNU_CHUNK_T* pX, cpSize nsX, const BNU_CHUNK_T* pE, cpSize nsE, gsModEngine* pModEngine)) -__INLINE void cpMontExpBin_BN_sscm(IppsBigNumState* pYbn, +__IPPCP_INLINE void cpMontExpBin_BN_sscm(IppsBigNumState* pYbn, const IppsBigNumState* pXbn, const IppsBigNumState* pEbn, gsModEngine* pMont) @@ -223,7 +223,7 @@ __INLINE void cpMontExpBin_BN_sscm(IppsBigNumState* pYbn, BN_SIGN(pYbn) = ippBigNumPOS; } -__INLINE void cpMontExpBin_BN(IppsBigNumState* pYbn, +__IPPCP_INLINE void cpMontExpBin_BN(IppsBigNumState* pYbn, const IppsBigNumState* pXbn, const IppsBigNumState* pEbn, gsModEngine* pModEngine) diff --git a/sources/ippcp/pcpmontred.h b/sources/ippcp/pcpmontred.h index 74ebef75..2ef13268 100644 --- a/sources/ippcp/pcpmontred.h +++ b/sources/ippcp/pcpmontred.h @@ -14,10 +14,10 @@ * limitations under the License. *************************************************************************/ -/* +/* // Intel(R) Integrated Performance Primitives // Cryptographic Primitives (ippcp) -// +// */ #if !defined(_CP_MONTRED_H) #define _CP_MONTRED_H @@ -34,7 +34,7 @@ #define cpMontRedAdx_BNU OWNAPI(cpMontRedAdx_BNU) IPP_OWN_DECL (void, cpMontRedAdx_BNU, (BNU_CHUNK_T* pR, BNU_CHUNK_T* pProduct, const BNU_CHUNK_T* pModulus, cpSize nsM, BNU_CHUNK_T m0)) -__INLINE void cpMontRed_BNU_opt(BNU_CHUNK_T* pR, +__IPPCP_INLINE void cpMontRed_BNU_opt(BNU_CHUNK_T* pR, BNU_CHUNK_T* pProduct, const BNU_CHUNK_T* pModulus, cpSize nsM, BNU_CHUNK_T m0) { diff --git a/sources/ippcp/pcpngmontexpstuff.h b/sources/ippcp/pcpngmontexpstuff.h index c2943ab9..1c924d86 100644 --- a/sources/ippcp/pcpngmontexpstuff.h +++ b/sources/ippcp/pcpngmontexpstuff.h @@ -14,14 +14,14 @@ * limitations under the License. *************************************************************************/ -/* -// +/* +// // Purpose: // Cryptography Primitive. // Internal Definitions and // Internal ng RSA Function Prototypes -// -// +// +// */ #if !defined(_CP_NG_MONT_EXP_STUFF_H) @@ -35,7 +35,7 @@ /* // optimal size of fixed window exponentiation */ -__INLINE cpSize gsMontExp_WinSize(cpSize bitsize) +__IPPCP_INLINE cpSize gsMontExp_WinSize(cpSize bitsize) { #if defined(_USE_WINDOW_EXP_) // new computations @@ -56,7 +56,7 @@ __INLINE cpSize gsMontExp_WinSize(cpSize bitsize) /* // Montgomery encoding/decoding */ -__INLINE cpSize gsMontEnc_BNU(BNU_CHUNK_T* pR, +__IPPCP_INLINE cpSize gsMontEnc_BNU(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pXreg, cpSize nsX, const gsModEngine* pMont) { @@ -66,7 +66,7 @@ __INLINE cpSize gsMontEnc_BNU(BNU_CHUNK_T* pR, return nsM; } -__INLINE cpSize gsMontDec_BNU(BNU_CHUNK_T* pR, +__IPPCP_INLINE cpSize gsMontDec_BNU(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pXmont, gsModEngine* pMont) { @@ -75,7 +75,7 @@ __INLINE cpSize gsMontDec_BNU(BNU_CHUNK_T* pR, return nsM; } -__INLINE void gsMontEnc_BN(IppsBigNumState* pRbn, +__IPPCP_INLINE void gsMontEnc_BN(IppsBigNumState* pRbn, const IppsBigNumState* pXbn, gsModEngine* pMont) { diff --git a/sources/ippcp/pcpngmontexpstuff_avx2.c b/sources/ippcp/pcpngmontexpstuff_avx2.c index ddc88bcd..6fb10cdd 100644 --- a/sources/ippcp/pcpngmontexpstuff_avx2.c +++ b/sources/ippcp/pcpngmontexpstuff_avx2.c @@ -99,7 +99,7 @@ static int dig27_regular(Ipp32u* pRegular, int regLen, const Ipp64u* pRep27, int } /* mont_mul wrapper */ -__INLINE void cpMontMul_avx2(Ipp64u* pR, const Ipp64u* pA, const Ipp64u* pB, const Ipp64u* pModulus, int mLen, Ipp64u k0, Ipp64u* pBuffer) +__IPPCP_INLINE void cpMontMul_avx2(Ipp64u* pR, const Ipp64u* pA, const Ipp64u* pB, const Ipp64u* pModulus, int mLen, Ipp64u k0, Ipp64u* pBuffer) { if(mLen==38) /* corresponds to 1024-bit regular representation */ cpMontMul1024_avx2(pR, pA, pB, pModulus, mLen, k0); @@ -115,7 +115,7 @@ __INLINE void cpMontMul_avx2(Ipp64u* pR, const Ipp64u* pA, const Ipp64u* pB, con } /* mont_sqr wrapper */ -__INLINE void cpMontSqr_avx2(Ipp64u* pR, const Ipp64u* pA, const Ipp64u* pModulus, int mLen, Ipp64u k0, Ipp64u* pBuffer) +__IPPCP_INLINE void cpMontSqr_avx2(Ipp64u* pR, const Ipp64u* pA, const Ipp64u* pModulus, int mLen, Ipp64u k0, Ipp64u* pBuffer) { if(mLen==38) /* corresponds to 1024-bit regular representation */ cpMontSqr1024_avx2(pR, pA, pModulus, mLen, k0, pBuffer); @@ -384,7 +384,7 @@ IPP_OWN_DEFN (cpSize, gsMontExpBin_BNU_sscm_avx2, (BNU_CHUNK_T* dataY, const BNU // "fast" fixed-size window montgomery exponentiation // // scratch buffer structure: -// precomuted table of multipliers[(1< bitSizeE > 0), it is checked in initialization phase by (ippsRSA_GetSizePublickey() and ippsRSA_InitPublicKey). Buffer "redE" assigned for copy of dataE, is 1 (64-bit) chunk longer than size of RSA modulus, @@ -538,7 +538,7 @@ IPP_OWN_DEFN (cpSize, gsMontExpWin_BNU_avx2, (BNU_CHUNK_T* dataY, const BNU_CHUN // "safe" fixed-size window montgomery exponentiation // // scratch buffer structure: -// precomuted table of multipliers[(1< bitSizeE > 0), it is checked in initialization phase by (ippsRSA_GetSizePublickey() and ippsRSA_InitPublicKey). Buffer "redE" assigned for copy of dataE, is 1 (64-bit) chunk longer than size of RSA modulus, @@ -943,7 +943,7 @@ IPP_OWN_DEFN (cpSize, gsMontExpWin_BNU_avx512, (BNU_CHUNK_T* dataY, const BNU_CH // "safe" fixed-size window montgomery exponentiation // // scratch buffer structure: -// precomuted table of multipliers[(1<0; strLen--) { @@ -109,7 +109,7 @@ static void regular_dig52(Ipp64u* out, int outLen /* in qwords */, const Ipp64u* converts "redundant" (base = 2^DIGIT_SIZE) representation into regular (base = 2^64) */ -__INLINE void putDig52(Ipp8u* pStr, int strLen, Ipp64u digit) +__IPPCP_INLINE void putDig52(Ipp8u* pStr, int strLen, Ipp64u digit) { for(; strLen>0; strLen--) { *pStr++ = (Ipp8u)(digit&0xFF); diff --git a/sources/ippcp/pcpngmontexpstuff_sse2.c b/sources/ippcp/pcpngmontexpstuff_sse2.c index 36bfb895..c53b3fa1 100644 --- a/sources/ippcp/pcpngmontexpstuff_sse2.c +++ b/sources/ippcp/pcpngmontexpstuff_sse2.c @@ -100,7 +100,7 @@ static int dig27_regular(Ipp32u* pRegular, int regLen, const Ipp64u* pRep27, int /* normalize "redundant" representation (pUnorm, len) into (pNorm, len) - and returns extansion + and returns extension */ static Ipp64u cpDigit27_normalize(Ipp64u* pNorm, const Ipp64u* pUnorm, int len) { @@ -643,7 +643,7 @@ IPP_OWN_DEFN (cpSize, gsMontExpBin_BNU_sscm_sse2, (BNU_CHUNK_T* dataY, const BNU // "fast" fixed-size window montgomery exponentiation // // scratch buffer structure: -// precomuted table of multipliers[(1< bitSizeE > 0), it is checked in initialization phase by (ippsRSA_GetSizePublickey() and ippsRSA_InitPublicKey). Buffer "dataEE" assigned for copy of dataExp, is 1 (64-bit) chunk longer than size of RSA modulus, diff --git a/sources/ippcp/pcpngmontexpstuff_win_sscm.c b/sources/ippcp/pcpngmontexpstuff_win_sscm.c index 4ab78311..d1b25966 100644 --- a/sources/ippcp/pcpngmontexpstuff_win_sscm.c +++ b/sources/ippcp/pcpngmontexpstuff_win_sscm.c @@ -14,8 +14,8 @@ * limitations under the License. *************************************************************************/ -/* -// +/* +// // Purpose: // Cryptography Primitive. // Modular Exponentiation (windowed "safe" version) @@ -36,7 +36,7 @@ // - possible inplace mode // // scratch buffer structure: -// precomuted table of multipliers[(1<=_IPP_G9) || (_IPP32E>=_IPP32E_E9)) -__INLINE int cpRand_hw_sample(BNU_CHUNK_T* pSample) +__IPPCP_INLINE int cpRand_hw_sample(BNU_CHUNK_T* pSample) { #define LOCAL_COUNTER (8) int n; @@ -54,7 +54,7 @@ __INLINE int cpRand_hw_sample(BNU_CHUNK_T* pSample) } #if (_IPP32E>=_IPP32E_E9) -__INLINE int cpRand_hw_sample32(Ipp32u* pSample) +__IPPCP_INLINE int cpRand_hw_sample32(Ipp32u* pSample) { #define LOCAL_COUNTER (8) int n; @@ -81,7 +81,7 @@ __INLINE int cpRand_hw_sample32(Ipp32u* pSample) // bufLen buffer length *F*/ -__INLINE int cpRandHW_buffer(Ipp32u* pBuffer, int bufLen) +__IPPCP_INLINE int cpRandHW_buffer(Ipp32u* pBuffer, int bufLen) { int nSamples = bufLen/((Ipp32s)(sizeof(BNU_CHUNK_T)/sizeof(Ipp32u))); diff --git a/sources/ippcp/pcprij128safe.h b/sources/ippcp/pcprij128safe.h index 5af882a4..91a7aba6 100644 --- a/sources/ippcp/pcprij128safe.h +++ b/sources/ippcp/pcprij128safe.h @@ -14,13 +14,13 @@ * limitations under the License. *************************************************************************/ -/* -// +/* +// // Purpose: // Cryptography Primitive. // Internal Safe Rijndael Encrypt, Decrypt -// -// +// +// */ #if !defined(_PCP_RIJ_SAFE_H) @@ -54,7 +54,7 @@ IPP_OWN_DECL (void, TransformComposite2Native, (Ipp8u out[16], const Ipp8u inp[16])) /* add round key operation */ -__INLINE void AddRoundKey(Ipp8u out[16], const Ipp8u inp[16], const Ipp8u rkey[16]) +__IPPCP_INLINE void AddRoundKey(Ipp8u out[16], const Ipp8u inp[16], const Ipp8u rkey[16]) { ((Ipp64u*)out)[0] = ((Ipp64u*)inp)[0] ^ ((Ipp64u*)rkey)[0]; ((Ipp64u*)out)[1] = ((Ipp64u*)inp)[1] ^ ((Ipp64u*)rkey)[1]; @@ -63,7 +63,7 @@ __INLINE void AddRoundKey(Ipp8u out[16], const Ipp8u inp[16], const Ipp8u rkey[1 /* add logs of GF(2^4) elements // the exp table has been build matched for that implementation */ -__INLINE Ipp8u AddLogGF16(Ipp8u loga, Ipp8u logb) +__IPPCP_INLINE Ipp8u AddLogGF16(Ipp8u loga, Ipp8u logb) { //Ipp8u s = loga+logb; //return (s>2*14)? 15 : (s>14)? s-15 : s; @@ -77,7 +77,7 @@ __INLINE Ipp8u AddLogGF16(Ipp8u loga, Ipp8u logb) #define SELECTION_BITS ((sizeof(BNU_CHUNK_T)/sizeof(Ipp8u)) -1) -__INLINE Ipp8u getSboxValue(Ipp8u x) +__IPPCP_INLINE Ipp8u getSboxValue(Ipp8u x) { BNU_CHUNK_T selection = 0; const Ipp8u* SboxEntry = RijEncSbox; diff --git a/sources/ippcp/pcprij128safe2.h b/sources/ippcp/pcprij128safe2.h index 9f893730..2b90b02a 100644 --- a/sources/ippcp/pcprij128safe2.h +++ b/sources/ippcp/pcprij128safe2.h @@ -14,13 +14,13 @@ * limitations under the License. *************************************************************************/ -/* -// +/* +// // Purpose: // Cryptography Primitive. // Internal Safe Rijndael Encrypt, Decrypt -// -// +// +// */ #if !defined(_PCP_RIJ_SAFE2_H) @@ -48,7 +48,7 @@ (out)[11] = (inp)[14]; \ (out)[15] = (inp)[15] -__INLINE void XorRoundKey(Ipp32u* state, const Ipp32u* RoundKey) +__IPPCP_INLINE void XorRoundKey(Ipp32u* state, const Ipp32u* RoundKey) { state[0] ^= RoundKey[0]; state[1] ^= RoundKey[1]; @@ -57,13 +57,13 @@ __INLINE void XorRoundKey(Ipp32u* state, const Ipp32u* RoundKey) } // xtime is a macro that finds the product of {02} and the argument to xtime modulo {1b} -__INLINE Ipp32u mask4(Ipp32u x) +__IPPCP_INLINE Ipp32u mask4(Ipp32u x) { x &= 0x80808080; return (Ipp32u)((x<<1) - (x>>7)); } -__INLINE Ipp32u xtime4(Ipp32u x) +__IPPCP_INLINE Ipp32u xtime4(Ipp32u x) { Ipp32u t = (x+x) &0xFEFEFEFE; t ^= mask4(x) & 0x1B1B1B1B; diff --git a/sources/ippcp/pcprij128safedec2pxca.c b/sources/ippcp/pcprij128safedec2pxca.c index 8fc6148c..bf49d814 100644 --- a/sources/ippcp/pcprij128safedec2pxca.c +++ b/sources/ippcp/pcprij128safedec2pxca.c @@ -14,17 +14,17 @@ * limitations under the License. *************************************************************************/ -/* -// +/* +// // Purpose: // Cryptography Primitive. // Decrypt 128-bit data block according to Rijndael // (compact S-box based implementation) -// +// // Contents: // Safe2Decrypt_RIJ128() -// -// +// +// */ #include "owncp.h" @@ -41,7 +41,7 @@ #define SELECTION_BITS ((sizeof(BNU_CHUNK_T)/sizeof(Ipp8u)) -1) #if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER) -__INLINE Ipp8u getInvSboxValue(Ipp8u x) +__IPPCP_INLINE Ipp8u getInvSboxValue(Ipp8u x) { BNU_CHUNK_T selection = 0; const BNU_CHUNK_T* SboxEntry = (BNU_CHUNK_T*)RijDecSbox; @@ -58,7 +58,7 @@ __INLINE Ipp8u getInvSboxValue(Ipp8u x) #else #include "pcpmask_ct.h" -__INLINE Ipp8u getInvSboxValue(Ipp8u x) +__IPPCP_INLINE Ipp8u getInvSboxValue(Ipp8u x) { BNU_CHUNK_T selection = 0; const BNU_CHUNK_T* SboxEntry = (BNU_CHUNK_T*)RijDecSbox; @@ -74,21 +74,21 @@ __INLINE Ipp8u getInvSboxValue(Ipp8u x) } #endif -__INLINE void invSubBytes(Ipp8u state[]) +__IPPCP_INLINE void invSubBytes(Ipp8u state[]) { int i; for(i=0;i<16;i++) state[i] = getInvSboxValue(state[i]); } -__INLINE void invShiftRows(Ipp32u* state) +__IPPCP_INLINE void invShiftRows(Ipp32u* state) { state[1] = ROR32(state[1], 24); state[2] = ROR32(state[2], 16); state[3] = ROR32(state[3], 8); } -__INLINE void invMixColumns(Ipp32u* state) +__IPPCP_INLINE void invMixColumns(Ipp32u* state) { Ipp32u y0 = state[1] ^ state[2] ^ state[3]; Ipp32u y1 = state[0] ^ state[2] ^ state[3]; diff --git a/sources/ippcp/pcprij128safeenc2pxca.c b/sources/ippcp/pcprij128safeenc2pxca.c index 73b57c4b..ef083f7d 100644 --- a/sources/ippcp/pcprij128safeenc2pxca.c +++ b/sources/ippcp/pcprij128safeenc2pxca.c @@ -14,17 +14,17 @@ * limitations under the License. *************************************************************************/ -/* -// +/* +// // Purpose: // Cryptography Primitive. // Encrypt 128-bit data block according to Rijndael // (compact S-box based implementation) -// +// // Contents: // Safe2Encrypt_RIJ128() -// -// +// +// */ #include "owncp.h" @@ -37,7 +37,7 @@ #include "pcprij128safe2.h" #include "pcprijtables.h" -__INLINE void SubBytes(Ipp8u state[]) +__IPPCP_INLINE void SubBytes(Ipp8u state[]) { int i; for(i=0;i<16;i++) { @@ -46,7 +46,7 @@ __INLINE void SubBytes(Ipp8u state[]) } -__INLINE void ShiftRows(Ipp32u* state) +__IPPCP_INLINE void ShiftRows(Ipp32u* state) { state[1] = ROR32(state[1], 8); state[2] = ROR32(state[2], 16); @@ -54,7 +54,7 @@ __INLINE void ShiftRows(Ipp32u* state) } // MixColumns4 function mixes the columns of the state matrix -__INLINE void MixColumns(Ipp32u* state) +__IPPCP_INLINE void MixColumns(Ipp32u* state) { Ipp32u y0 = state[1] ^ state[2] ^ state[3]; Ipp32u y1 = state[0] ^ state[2] ^ state[3]; diff --git a/sources/ippcp/pcprij128safeencpxca.c b/sources/ippcp/pcprij128safeencpxca.c index 0b1191b2..abaae489 100644 --- a/sources/ippcp/pcprij128safeencpxca.c +++ b/sources/ippcp/pcprij128safeencpxca.c @@ -14,17 +14,17 @@ * limitations under the License. *************************************************************************/ -/* -// +/* +// // Purpose: // Cryptography Primitive. // Encrypt 128-bit data block according to Rijndael // (It's the special free from Sbox/tables implementation) -// +// // Contents: // SafeEncrypt_RIJ128() -// -// +// +// */ #include "owncp.h" @@ -261,7 +261,7 @@ static void FwdSubByte(Ipp8u blk[16]) /* inplace ShifttRows operation */ /* int ShiftRowsInx[] = {0,5,10,15, 4,9,14,3, 8,13,2,7, 12,1,6,11}; */ -__INLINE void FwdShiftRows(Ipp8u blk[16]) +__IPPCP_INLINE void FwdShiftRows(Ipp8u blk[16]) { Ipp8u x = blk[1]; blk[1] = blk[5]; diff --git a/sources/ippcp/pcprijkeysca.c b/sources/ippcp/pcprijkeysca.c index 31487e1e..10daf549 100644 --- a/sources/ippcp/pcprijkeysca.c +++ b/sources/ippcp/pcprijkeysca.c @@ -14,17 +14,17 @@ * limitations under the License. *************************************************************************/ -/* -// +/* +// // Purpose: // Cryptography Primitive. // Initialization of Rijndael -// +// // Contents: // EncRijndaelKeys() // DecRijndaelKeys() -// -// +// +// */ #include "owndefs.h" @@ -143,7 +143,7 @@ static const Ipp32u RconTbl[] = { /// commented due to mitigation // -///* precomputed table for InvMixColumn() operation */ +///* precomputed table for InvMixColumn() operation */ //static const Ipp32u InvMixCol_Tbl[4][256] = { // { LINE(inv_t0) }, // { LINE(inv_t1) }, @@ -157,7 +157,7 @@ static const Ipp32u RconTbl[] = { // ^(tbl)[2][ EBYTE((x),2) ] \ // ^(tbl)[3][ EBYTE((x),3) ] ) -__INLINE Ipp32u InvMixColumn(Ipp32u x) +__IPPCP_INLINE Ipp32u InvMixColumn(Ipp32u x) { Ipp32u x_mul_2 = xtime4(x); Ipp32u x_mul_4 = xtime4(x_mul_2); @@ -193,7 +193,7 @@ IPP_OWN_DEFN (void, ExpandRijndaelKey, (const Ipp8u* pKey, int NK, int NB, int N Ipp32u k3 = enc_keys[3]; for(n=NK128; n> 8) & 0xFF) <<8); @@ -107,12 +107,12 @@ __INLINE Ipp32u cpSboxT_SMS4(Ipp32u x) - linear Linear - mixer Mix (permutation T in the SMS4 standard phraseology) */ -__INLINE Ipp32u cpExpKeyLinear_SMS4(Ipp32u x) +__IPPCP_INLINE Ipp32u cpExpKeyLinear_SMS4(Ipp32u x) { return x^ROL32(x,13)^ROL32(x,23); } -__INLINE Ipp32u cpExpKeyMix_SMS4(Ipp32u x) +__IPPCP_INLINE Ipp32u cpExpKeyMix_SMS4(Ipp32u x) { return cpExpKeyLinear_SMS4( cpSboxT_SMS4(x) ); } @@ -121,12 +121,12 @@ __INLINE Ipp32u cpExpKeyMix_SMS4(Ipp32u x) - linear Linear - mixer Mix (permutation T in the SMS4 standard phraseology) */ -__INLINE Ipp32u cpCipherLinear_SMS4(Ipp32u x) +__IPPCP_INLINE Ipp32u cpCipherLinear_SMS4(Ipp32u x) { return x^ROL32(x,2)^ROL32(x,10)^ROL32(x,18)^ROL32(x,24); } -__INLINE Ipp32u cpCipherMix_SMS4(Ipp32u x) +__IPPCP_INLINE Ipp32u cpCipherMix_SMS4(Ipp32u x) { return cpCipherLinear_SMS4( cpSboxT_SMS4(x) ); } diff --git a/sources/ippcp/pcpsms4_ccmstart.c b/sources/ippcp/pcpsms4_ccmstart.c index c1da3930..8b3fcc74 100644 --- a/sources/ippcp/pcpsms4_ccmstart.c +++ b/sources/ippcp/pcpsms4_ccmstart.c @@ -18,7 +18,7 @@ // Purpose: // Cryptography Primitive. // SMS4-CCM implementation. -// +// // Content: // ippsSMS4_CCMStart() // @@ -32,7 +32,7 @@ /*F* // Name: ippsSMS4_CCMStart // -// Purpose: Start the process (encryption+generation) or (decryption+veryfication). +// Purpose: Start the process (encryption+generation) or (decryption+verification). // // Returns: Reason: // ippStsNullPtrErr pCtx == NULL diff --git a/sources/ippcp/pcpsms4_ctr_gfni.c b/sources/ippcp/pcpsms4_ctr_gfni.c index bc41f31a..e1520f17 100644 --- a/sources/ippcp/pcpsms4_ctr_gfni.c +++ b/sources/ippcp/pcpsms4_ctr_gfni.c @@ -42,12 +42,12 @@ #include "pcpsms4_gfni.h" -static __ALIGN32 Ipp8u endiannes_swap[] = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3, +static __ALIGN32 Ipp8u endianness_swap[] = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3}; -static __ALIGN32 Ipp8u endiannes[] = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0, +static __ALIGN32 Ipp8u endianness[] = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0, 15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0, 15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0, 15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0}; @@ -64,7 +64,7 @@ static __ALIGN16 Ipp8u next_inc[] = {4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, static __ALIGN16 Ipp8u one128[] = {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; -__INLINE __m512i inc512(__m512i x, Ipp8u* increment) +__IPPCP_INLINE __m512i inc512(__m512i x, Ipp8u* increment) { __m512i t = _mm512_add_epi64(x, M512(increment)); __mmask8 carryMask = _mm512_cmplt_epu64_mask(t, x); @@ -74,7 +74,7 @@ __INLINE __m512i inc512(__m512i x, Ipp8u* increment) return t; } -__INLINE __m128i inc128(__m128i x) +__IPPCP_INLINE __m128i inc128(__m128i x) { __m128i t = _mm_add_epi64(x, M128(one128)); x = _mm_cmpeq_epi64(t, _mm_setzero_si128()); @@ -88,9 +88,9 @@ static int cpSMS4_CTR_gfni512x32(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u* pRKey, const Ipp8u* pCtrMask, Ipp8u* pCtr); static int cpSMS4_CTR_gfni512x16(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u* pRKey, const Ipp8u* pCtrMask, Ipp8u* pCtr); -static +static int cpSMS4_CTR_gfni128x12(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u* pRKey, const Ipp8u* pCtrMask, Ipp8u* pCtr); -static +static int cpSMS4_CTR_gfni128x8(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u* pRKey, const Ipp8u* pCtrMask, Ipp8u* pCtr); static int cpSMS4_ECB_gfni128x4(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u* pRKey, const Ipp8u* pCtrMask, Ipp8u* pCtr); @@ -113,94 +113,94 @@ IPP_OWN_DEFN (int, cpSMS4_CTR_gfni512, (Ipp8u* pOut, const Ipp8u* pInp, int len, // TMP[22] - ctrUnch TMP[20] = _mm512_broadcast_i64x2(_mm_loadu_si128((__m128i*)pCtr)); - TMP[21] = _mm512_broadcast_i64x2(_mm_loadu_si128((__m128i*)pCtrMask)); + TMP[21] = _mm512_broadcast_i64x2(_mm_loadu_si128((__m128i*)pCtrMask)); /* read string counter and convert to numerical */ - TMP[20] = _mm512_shuffle_epi8(TMP[20], M512(endiannes)); + TMP[20] = _mm512_shuffle_epi8(TMP[20], M512(endianness)); /* read string mask and convert to numerical */ - TMP[21] = _mm512_shuffle_epi8(TMP[21], M512(endiannes)); + TMP[21] = _mm512_shuffle_epi8(TMP[21], M512(endianness)); /* upchanged counter bits */ TMP[22] = _mm512_andnot_si512(TMP[21], TMP[20]); - + /* first incremention */ TMP[20] = inc512(TMP[20], first_inc); - + TMP[20] = _mm512_and_si512(TMP[21], TMP[20]); for (n = 0; n < processedLen; n += (64 * MBS_SMS4), pInp += (64 * MBS_SMS4), pOut += (64 * MBS_SMS4)) { - int itr; + int itr; TMP[0] = TMP[20]; TMP[1] = inc512(TMP[0], next_inc); TMP[2] = inc512(TMP[1], next_inc); TMP[3] = inc512(TMP[2], next_inc); - TMP[20] = inc512(TMP[3], next_inc); + TMP[20] = inc512(TMP[3], next_inc); TMP[0] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[0], TMP[21])); TMP[1] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[1], TMP[21])); TMP[2] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[2], TMP[21])); TMP[3] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[3], TMP[21])); - TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endiannes_swap)); - TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endiannes_swap)); - TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endiannes_swap)); - TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endiannes_swap)); + TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endianness_swap)); + TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endianness_swap)); + TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endianness_swap)); + TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endianness_swap)); TRANSPOSE_INP_512(TMP[4], TMP[5], TMP[6], TMP[7], TMP[0], TMP[1], TMP[2], TMP[3]); - + TMP[0] = TMP[20]; TMP[1] = inc512(TMP[0], next_inc); TMP[2] = inc512(TMP[1], next_inc); TMP[3] = inc512(TMP[2], next_inc); - TMP[20] = inc512(TMP[3], next_inc); + TMP[20] = inc512(TMP[3], next_inc); TMP[0] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[0], TMP[21])); TMP[1] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[1], TMP[21])); TMP[2] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[2], TMP[21])); TMP[3] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[3], TMP[21])); - TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endiannes_swap)); - TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endiannes_swap)); - TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endiannes_swap)); - TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endiannes_swap)); + TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endianness_swap)); + TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endianness_swap)); + TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endianness_swap)); + TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endianness_swap)); TRANSPOSE_INP_512(TMP[8], TMP[9], TMP[10], TMP[11], TMP[0], TMP[1], TMP[2], TMP[3]); TMP[0] = TMP[20]; TMP[1] = inc512(TMP[0], next_inc); TMP[2] = inc512(TMP[1], next_inc); TMP[3] = inc512(TMP[2], next_inc); - TMP[20] = inc512(TMP[3], next_inc); + TMP[20] = inc512(TMP[3], next_inc); TMP[0] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[0], TMP[21])); TMP[1] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[1], TMP[21])); TMP[2] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[2], TMP[21])); TMP[3] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[3], TMP[21])); - TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endiannes_swap)); - TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endiannes_swap)); - TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endiannes_swap)); - TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endiannes_swap)); + TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endianness_swap)); + TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endianness_swap)); + TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endianness_swap)); + TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endianness_swap)); TRANSPOSE_INP_512(TMP[12], TMP[13], TMP[14], TMP[15], TMP[0], TMP[1], TMP[2], TMP[3]); TMP[0] = TMP[20]; TMP[1] = inc512(TMP[0], next_inc); TMP[2] = inc512(TMP[1], next_inc); TMP[3] = inc512(TMP[2], next_inc); - TMP[20] = inc512(TMP[3], next_inc); + TMP[20] = inc512(TMP[3], next_inc); TMP[0] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[0], TMP[21])); TMP[1] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[1], TMP[21])); TMP[2] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[2], TMP[21])); TMP[3] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[3], TMP[21])); - TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endiannes_swap)); - TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endiannes_swap)); - TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endiannes_swap)); - TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endiannes_swap)); + TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endianness_swap)); + TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endianness_swap)); + TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endianness_swap)); + TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endianness_swap)); TRANSPOSE_INP_512(TMP[16], TMP[17], TMP[18], TMP[19], TMP[0], TMP[1], TMP[2], TMP[3]); - + for (itr = 0; itr < 8; itr++, pRKey += 4) { /* initial xors */ TMP[3] = TMP[2] = TMP[1] = TMP[0] = _mm512_set1_epi32((Ipp32s)pRKey[0]); @@ -303,7 +303,7 @@ IPP_OWN_DEFN (int, cpSMS4_CTR_gfni512, (Ipp8u* pOut, const Ipp8u* pInp, int len, TMP[19] = _mm512_xor_si512(_mm512_xor_si512(TMP[19], TMP[3]), L512(TMP[3])); } - + pRKey -= 32; TRANSPOSE_OUT_512(TMP[0], TMP[1], TMP[2], TMP[3], TMP[4], TMP[5], TMP[6], TMP[7]); @@ -350,7 +350,7 @@ IPP_OWN_DEFN (int, cpSMS4_CTR_gfni512, (Ipp8u* pOut, const Ipp8u* pInp, int len, /* Save counter */ TMP[20] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[20], TMP[21])); - TMP[20] = _mm512_shuffle_epi8(TMP[20], M512(endiannes)); + TMP[20] = _mm512_shuffle_epi8(TMP[20], M512(endianness)); _mm_storeu_si128((__m128i*)pCtr, _mm512_castsi512_si128(TMP[20])); /* clear secret data */ @@ -359,7 +359,7 @@ IPP_OWN_DEFN (int, cpSMS4_CTR_gfni512, (Ipp8u* pOut, const Ipp8u* pInp, int len, } } - + len -= processedLen; if (len) processedLen += cpSMS4_CTR_gfni512x48(pOut, pInp, len, pRKey, pCtrMask, pCtr); @@ -386,76 +386,76 @@ int cpSMS4_CTR_gfni512x48(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u* // TMP[18] - ctrUnch TMP[16] = _mm512_broadcast_i64x2(_mm_loadu_si128((__m128i*)pCtr)); - TMP[17] = _mm512_broadcast_i64x2(_mm_loadu_si128((__m128i*)pCtrMask)); + TMP[17] = _mm512_broadcast_i64x2(_mm_loadu_si128((__m128i*)pCtrMask)); /* read string counter and convert to numerical */ - TMP[16] = _mm512_shuffle_epi8(TMP[16], M512(endiannes)); + TMP[16] = _mm512_shuffle_epi8(TMP[16], M512(endianness)); /* read string mask and convert to numerical */ - TMP[17] = _mm512_shuffle_epi8(TMP[17], M512(endiannes)); + TMP[17] = _mm512_shuffle_epi8(TMP[17], M512(endianness)); /* upchanged counter bits */ TMP[18] = _mm512_andnot_si512(TMP[17], TMP[16]); - + /* first incremention */ TMP[16] = inc512(TMP[16], first_inc); - + TMP[16] = _mm512_and_si512(TMP[17], TMP[16]); for (n = 0; n < processedLen; n += (48 * MBS_SMS4), pInp += (48 * MBS_SMS4), pOut += (48 * MBS_SMS4)) { - int itr; + int itr; TMP[0] = TMP[16]; TMP[1] = inc512(TMP[0], next_inc); TMP[2] = inc512(TMP[1], next_inc); TMP[3] = inc512(TMP[2], next_inc); - TMP[16] = inc512(TMP[3], next_inc); + TMP[16] = inc512(TMP[3], next_inc); TMP[0] = _mm512_xor_si512(TMP[18], _mm512_and_si512(TMP[0], TMP[17])); TMP[1] = _mm512_xor_si512(TMP[18], _mm512_and_si512(TMP[1], TMP[17])); TMP[2] = _mm512_xor_si512(TMP[18], _mm512_and_si512(TMP[2], TMP[17])); TMP[3] = _mm512_xor_si512(TMP[18], _mm512_and_si512(TMP[3], TMP[17])); - TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endiannes_swap)); - TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endiannes_swap)); - TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endiannes_swap)); - TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endiannes_swap)); + TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endianness_swap)); + TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endianness_swap)); + TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endianness_swap)); + TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endianness_swap)); TRANSPOSE_INP_512(TMP[4], TMP[5], TMP[6], TMP[7], TMP[0], TMP[1], TMP[2], TMP[3]); - + TMP[0] = TMP[16]; TMP[1] = inc512(TMP[0], next_inc); TMP[2] = inc512(TMP[1], next_inc); TMP[3] = inc512(TMP[2], next_inc); - TMP[16] = inc512(TMP[3], next_inc); + TMP[16] = inc512(TMP[3], next_inc); TMP[0] = _mm512_xor_si512(TMP[18], _mm512_and_si512(TMP[0], TMP[17])); TMP[1] = _mm512_xor_si512(TMP[18], _mm512_and_si512(TMP[1], TMP[17])); TMP[2] = _mm512_xor_si512(TMP[18], _mm512_and_si512(TMP[2], TMP[17])); TMP[3] = _mm512_xor_si512(TMP[18], _mm512_and_si512(TMP[3], TMP[17])); - TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endiannes_swap)); - TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endiannes_swap)); - TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endiannes_swap)); - TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endiannes_swap)); + TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endianness_swap)); + TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endianness_swap)); + TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endianness_swap)); + TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endianness_swap)); TRANSPOSE_INP_512(TMP[8], TMP[9], TMP[10], TMP[11], TMP[0], TMP[1], TMP[2], TMP[3]); TMP[0] = TMP[16]; TMP[1] = inc512(TMP[0], next_inc); TMP[2] = inc512(TMP[1], next_inc); TMP[3] = inc512(TMP[2], next_inc); - TMP[16] = inc512(TMP[3], next_inc); + TMP[16] = inc512(TMP[3], next_inc); TMP[0] = _mm512_xor_si512(TMP[18], _mm512_and_si512(TMP[0], TMP[17])); TMP[1] = _mm512_xor_si512(TMP[18], _mm512_and_si512(TMP[1], TMP[17])); TMP[2] = _mm512_xor_si512(TMP[18], _mm512_and_si512(TMP[2], TMP[17])); TMP[3] = _mm512_xor_si512(TMP[18], _mm512_and_si512(TMP[3], TMP[17])); - TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endiannes_swap)); - TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endiannes_swap)); - TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endiannes_swap)); - TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endiannes_swap)); + TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endianness_swap)); + TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endianness_swap)); + TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endianness_swap)); + TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endianness_swap)); TRANSPOSE_INP_512(TMP[12], TMP[13], TMP[14], TMP[15], TMP[0], TMP[1], TMP[2], TMP[3]); - + for (itr = 0; itr < 8; itr++, pRKey += 4) { /* initial xors */ TMP[2] = TMP[1] = TMP[0] = _mm512_set1_epi32((Ipp32s)pRKey[0]); @@ -538,7 +538,7 @@ int cpSMS4_CTR_gfni512x48(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u* TMP[15] = _mm512_xor_si512(_mm512_xor_si512(TMP[15], TMP[2]), L512(TMP[2])); } - + pRKey -= 32; TRANSPOSE_OUT_512(TMP[0], TMP[1], TMP[2], TMP[3], TMP[4], TMP[5], TMP[6], TMP[7]); @@ -575,7 +575,7 @@ int cpSMS4_CTR_gfni512x48(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u* /* Save counter */ TMP[16] = _mm512_xor_si512(TMP[18], _mm512_and_si512(TMP[16], TMP[17])); - TMP[16] = _mm512_shuffle_epi8(TMP[16], M512(endiannes)); + TMP[16] = _mm512_shuffle_epi8(TMP[16], M512(endianness)); _mm_storeu_si128((__m128i*)pCtr, _mm512_castsi512_si128(TMP[16])); /* clear secret data */ @@ -584,7 +584,7 @@ int cpSMS4_CTR_gfni512x48(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u* } } - + len -= processedLen; if (len) processedLen += cpSMS4_CTR_gfni512x32(pOut, pInp, len, pRKey, pCtrMask, pCtr); @@ -611,59 +611,59 @@ int cpSMS4_CTR_gfni512x32(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u* // TMP[14] - ctrUnch TMP[12] = _mm512_broadcast_i64x2(_mm_loadu_si128((__m128i*)pCtr)); - TMP[13] = _mm512_broadcast_i64x2(_mm_loadu_si128((__m128i*)pCtrMask)); + TMP[13] = _mm512_broadcast_i64x2(_mm_loadu_si128((__m128i*)pCtrMask)); /* read string counter and convert to numerical */ - TMP[12] = _mm512_shuffle_epi8(TMP[12], M512(endiannes)); + TMP[12] = _mm512_shuffle_epi8(TMP[12], M512(endianness)); /* read string mask and convert to numerical */ - TMP[13] = _mm512_shuffle_epi8(TMP[13], M512(endiannes)); + TMP[13] = _mm512_shuffle_epi8(TMP[13], M512(endianness)); /* upchanged counter bits */ TMP[14] = _mm512_andnot_si512(TMP[13], TMP[12]); - + /* first incremention */ TMP[12] = inc512(TMP[12], first_inc); - + TMP[12] = _mm512_and_si512(TMP[13], TMP[12]); for (n = 0; n < processedLen; n += (32 * MBS_SMS4), pInp += (32 * MBS_SMS4), pOut += (32 * MBS_SMS4)) { - int itr; + int itr; TMP[0] = TMP[12]; TMP[1] = inc512(TMP[0], next_inc); TMP[2] = inc512(TMP[1], next_inc); TMP[3] = inc512(TMP[2], next_inc); - TMP[12] = inc512(TMP[3], next_inc); + TMP[12] = inc512(TMP[3], next_inc); TMP[0] = _mm512_xor_si512(TMP[14], _mm512_and_si512(TMP[0], TMP[13])); TMP[1] = _mm512_xor_si512(TMP[14], _mm512_and_si512(TMP[1], TMP[13])); TMP[2] = _mm512_xor_si512(TMP[14], _mm512_and_si512(TMP[2], TMP[13])); TMP[3] = _mm512_xor_si512(TMP[14], _mm512_and_si512(TMP[3], TMP[13])); - TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endiannes_swap)); - TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endiannes_swap)); - TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endiannes_swap)); - TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endiannes_swap)); + TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endianness_swap)); + TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endianness_swap)); + TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endianness_swap)); + TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endianness_swap)); TRANSPOSE_INP_512(TMP[4], TMP[5], TMP[6], TMP[7], TMP[0], TMP[1], TMP[2], TMP[3]); - + TMP[0] = TMP[12]; TMP[1] = inc512(TMP[0], next_inc); TMP[2] = inc512(TMP[1], next_inc); TMP[3] = inc512(TMP[2], next_inc); - TMP[12] = inc512(TMP[3], next_inc); + TMP[12] = inc512(TMP[3], next_inc); TMP[0] = _mm512_xor_si512(TMP[14], _mm512_and_si512(TMP[0], TMP[13])); TMP[1] = _mm512_xor_si512(TMP[14], _mm512_and_si512(TMP[1], TMP[13])); TMP[2] = _mm512_xor_si512(TMP[14], _mm512_and_si512(TMP[2], TMP[13])); TMP[3] = _mm512_xor_si512(TMP[14], _mm512_and_si512(TMP[3], TMP[13])); - TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endiannes_swap)); - TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endiannes_swap)); - TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endiannes_swap)); - TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endiannes_swap)); + TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endianness_swap)); + TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endianness_swap)); + TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endianness_swap)); + TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endianness_swap)); TRANSPOSE_INP_512(TMP[8], TMP[9], TMP[10], TMP[11], TMP[0], TMP[1], TMP[2], TMP[3]); - + for (itr = 0; itr < 8; itr++, pRKey += 4) { /* initial xors */ TMP[1] = TMP[0] = _mm512_set1_epi32((Ipp32s)pRKey[0]); @@ -726,7 +726,7 @@ int cpSMS4_CTR_gfni512x32(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u* TMP[11] = _mm512_xor_si512(_mm512_xor_si512(TMP[11], TMP[1]), L512(TMP[1])); } - + pRKey -= 32; TRANSPOSE_OUT_512(TMP[0], TMP[1], TMP[2], TMP[3], TMP[4], TMP[5], TMP[6], TMP[7]); @@ -753,7 +753,7 @@ int cpSMS4_CTR_gfni512x32(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u* /* Save counter */ TMP[12] = _mm512_xor_si512(TMP[14], _mm512_and_si512(TMP[12], TMP[13])); - TMP[12] = _mm512_shuffle_epi8(TMP[12], M512(endiannes)); + TMP[12] = _mm512_shuffle_epi8(TMP[12], M512(endianness)); _mm_storeu_si128((__m128i*)pCtr, _mm512_castsi512_si128(TMP[12])); /* clear secret data */ @@ -762,7 +762,7 @@ int cpSMS4_CTR_gfni512x32(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u* } } - + len -= processedLen; if (len) processedLen += cpSMS4_CTR_gfni512x16(pOut, pInp, len, pRKey, pCtrMask, pCtr); @@ -789,40 +789,40 @@ int cpSMS4_CTR_gfni512x16(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u* // TMP[10] - ctrUnch TMP[8] = _mm512_broadcast_i64x2(_mm_loadu_si128((__m128i*)pCtr)); - TMP[9] = _mm512_broadcast_i64x2(_mm_loadu_si128((__m128i*)pCtrMask)); + TMP[9] = _mm512_broadcast_i64x2(_mm_loadu_si128((__m128i*)pCtrMask)); /* read string counter and convert to numerical */ - TMP[8] = _mm512_shuffle_epi8(TMP[8], M512(endiannes)); + TMP[8] = _mm512_shuffle_epi8(TMP[8], M512(endianness)); /* read string mask and convert to numerical */ - TMP[9] = _mm512_shuffle_epi8(TMP[9], M512(endiannes)); + TMP[9] = _mm512_shuffle_epi8(TMP[9], M512(endianness)); /* upchanged counter bits */ TMP[10] = _mm512_andnot_si512(TMP[9], TMP[8]); - + /* first incremention */ TMP[8] = inc512(TMP[8], first_inc); - + TMP[8] = _mm512_and_si512(TMP[9], TMP[8]); for (n = 0; n < processedLen; n += (16 * MBS_SMS4), pInp += (16 * MBS_SMS4), pOut += (16 * MBS_SMS4)) { - int itr; + int itr; TMP[0] = TMP[8]; TMP[1] = inc512(TMP[0], next_inc); TMP[2] = inc512(TMP[1], next_inc); TMP[3] = inc512(TMP[2], next_inc); - TMP[8] = inc512(TMP[3], next_inc); + TMP[8] = inc512(TMP[3], next_inc); TMP[0] = _mm512_xor_si512(TMP[10], _mm512_and_si512(TMP[0], TMP[9])); TMP[1] = _mm512_xor_si512(TMP[10], _mm512_and_si512(TMP[1], TMP[9])); TMP[2] = _mm512_xor_si512(TMP[10], _mm512_and_si512(TMP[2], TMP[9])); TMP[3] = _mm512_xor_si512(TMP[10], _mm512_and_si512(TMP[3], TMP[9])); - TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endiannes_swap)); - TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endiannes_swap)); - TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endiannes_swap)); - TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endiannes_swap)); + TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endianness_swap)); + TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endianness_swap)); + TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endianness_swap)); + TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endianness_swap)); TRANSPOSE_INP_512(TMP[4], TMP[5], TMP[6], TMP[7], TMP[0], TMP[1], TMP[2], TMP[3]); for (itr = 0; itr < 8; itr++, pRKey += 4) { @@ -883,7 +883,7 @@ int cpSMS4_CTR_gfni512x16(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u* /* Save counter */ TMP[8] = _mm512_xor_si512(TMP[10], _mm512_and_si512(TMP[8], TMP[9])); - TMP[8] = _mm512_shuffle_epi8(TMP[8], M512(endiannes)); + TMP[8] = _mm512_shuffle_epi8(TMP[8], M512(endianness)); _mm_storeu_si128((__m128i*)pCtr, _mm512_castsi512_si128(TMP[8])); /* clear secret data */ @@ -892,7 +892,7 @@ int cpSMS4_CTR_gfni512x16(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u* } } - + len -= processedLen; if (len) processedLen += cpSMS4_CTR_gfni128x12(pOut, pInp, len, pRKey, pCtrMask, pCtr); @@ -904,14 +904,14 @@ int cpSMS4_CTR_gfni512x16(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u* // 12*MBS_SMS4 processing */ -static +static int cpSMS4_CTR_gfni128x12(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u* pRKey, const Ipp8u* pCtrMask, Ipp8u* pCtr) { int processedLen = len - (len % (12 * MBS_SMS4)); int n; if(processedLen){ - + __ALIGN16 __m128i TMP[22]; // TMP[15] - ctr @@ -921,8 +921,8 @@ int cpSMS4_CTR_gfni128x12(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u* TMP[15] = _mm_loadu_si128((__m128i*)pCtr); TMP[16] = _mm_loadu_si128((__m128i*)pCtrMask); - TMP[16] = _mm_shuffle_epi8(TMP[16], M128(endiannes)); - TMP[15] = _mm_shuffle_epi8(TMP[15], M128(endiannes)); + TMP[16] = _mm_shuffle_epi8(TMP[16], M128(endianness)); + TMP[15] = _mm_shuffle_epi8(TMP[15], M128(endianness)); TMP[17] = _mm_andnot_si128(TMP[16], TMP[15]); for(n=0; n=_IPP_W7) || (_IPP32E>=_IPP32E_M7)) -__INLINE void PurgeBlock(void* pDst, int len) +__IPPCP_INLINE void PurgeBlock(void* pDst, int len) { int n; for(n=0; n> (blkBitSize -numSize)%8 ); @@ -193,7 +193,7 @@ __INLINE void StdIncrement(Ipp8u* pCounter, int blkBitSize, int numSize) } /* vb */ -__INLINE void ompStdIncrement64( void* pInitCtrVal, void* pCurrCtrVal, +__IPPCP_INLINE void ompStdIncrement64( void* pInitCtrVal, void* pCurrCtrVal, int ctrNumBitSize, int n ) { int k; @@ -247,7 +247,7 @@ __INLINE void ompStdIncrement64( void* pInitCtrVal, void* pCurrCtrVal, /* vb */ -__INLINE void ompStdIncrement128( void* pInitCtrVal, void* pCurrCtrVal, +__IPPCP_INLINE void ompStdIncrement128( void* pInitCtrVal, void* pCurrCtrVal, int ctrNumBitSize, int n ) { int k; @@ -342,7 +342,7 @@ __INLINE void ompStdIncrement128( void* pInitCtrVal, void* pCurrCtrVal, #if 0 /* vb */ -__INLINE void ompStdIncrement192( void* pInitCtrVal, void* pCurrCtrVal, +__IPPCP_INLINE void ompStdIncrement192( void* pInitCtrVal, void* pCurrCtrVal, int ctrNumBitSize, int n ) { int k; @@ -468,7 +468,7 @@ __INLINE void ompStdIncrement192( void* pInitCtrVal, void* pCurrCtrVal, #if 0 /* vb */ -__INLINE void ompStdIncrement256( void* pInitCtrVal, void* pCurrCtrVal, +__IPPCP_INLINE void ompStdIncrement256( void* pInitCtrVal, void* pCurrCtrVal, int ctrNumBitSize, int n ) { int k; diff --git a/sources/ippcp/pcpver.h b/sources/ippcp/pcpver.h index 0f3dc9c1..5ed5979d 100644 --- a/sources/ippcp/pcpver.h +++ b/sources/ippcp/pcpver.h @@ -26,5 +26,6 @@ #include "ippver.h" #define BUILD() 1043 #define VERSION() BASE_VERSION(),BUILD() +#define STR_FILE_VERSION() STR_BASE_VERSION() "," STR(BUILD()) /* ////////////////////////// End of file "pcpver.h" ///////////////////////// */ diff --git a/sources/ippcp/sm2/ifma_arith_nsm2.c b/sources/ippcp/sm2/ifma_arith_nsm2.c index be0fa375..d6ba6a63 100644 --- a/sources/ippcp/sm2/ifma_arith_nsm2.c +++ b/sources/ippcp/sm2/ifma_arith_nsm2.c @@ -183,12 +183,12 @@ IPP_OWN_DEFN(fesm2, fesm2_from_mont_norder, (const fesm2 a)) { return r; } -__INLINE fesm2 mul_norder_norm(const fesm2 a, const fesm2 b) { +__IPPCP_INLINE fesm2 mul_norder_norm(const fesm2 a, const fesm2 b) { const fesm2 r = fesm2_mul_norder(a, b); return ifma_lnorm52(r); } -__INLINE fesm2 sqr_norder_norm(const fesm2 a) { +__IPPCP_INLINE fesm2 sqr_norder_norm(const fesm2 a) { const fesm2 r = fesm2_mul_norder(a, a); return ifma_lnorm52(r); } diff --git a/sources/ippcp/sm2/ifma_arith_psm2.c b/sources/ippcp/sm2/ifma_arith_psm2.c index 127da410..84950a63 100644 --- a/sources/ippcp/sm2/ifma_arith_psm2.c +++ b/sources/ippcp/sm2/ifma_arith_psm2.c @@ -246,12 +246,12 @@ IPP_OWN_DEFN(fesm2, fesm2_from_mont, (const fesm2 a)) { return r; } -__INLINE fesm2 fesm2_mul_norm(const fesm2 a, const fesm2 b) { +__IPPCP_INLINE fesm2 fesm2_mul_norm(const fesm2 a, const fesm2 b) { fesm2 r = fesm2_mul(a, b); return ifma_lnorm52(r); } -__INLINE fesm2 fesm2_sqr_norm(const fesm2 a) { +__IPPCP_INLINE fesm2 fesm2_sqr_norm(const fesm2 a) { fesm2 r = fesm2_sqr(a); return ifma_lnorm52(r); } @@ -262,7 +262,7 @@ __INLINE fesm2 fesm2_sqr_norm(const fesm2 a) { fesm2_mul_dual(&(R1), (A1), (B1), &(R2), (A2), (B2)); \ ifma_lnorm52_dual(&(R1), (R1), &(R2), (R2)); -__INLINE fesm2 fesm2_sqr_ntimes(const fesm2 a, int n) { +__IPPCP_INLINE fesm2 fesm2_sqr_ntimes(const fesm2 a, int n) { fesm2 r = a; for (; n > 0; --n) sqr(r, r); diff --git a/sources/ippcp/sm2/ifma_arith_psm2.h b/sources/ippcp/sm2/ifma_arith_psm2.h index 18dde980..f84e43a9 100644 --- a/sources/ippcp/sm2/ifma_arith_psm2.h +++ b/sources/ippcp/sm2/ifma_arith_psm2.h @@ -62,7 +62,7 @@ IPP_OWN_DECL(fesm2, fesm2_mul, (const fesm2 a, const fesm2 b)) * \param[in] a value (in radix 2^52) * \return fesm2 not normalization value */ -__INLINE IPP_OWN_DEFN(fesm2, fesm2_sqr, (const fesm2 a)) { +__IPPCP_INLINE IPP_OWN_DEFN(fesm2, fesm2_sqr, (const fesm2 a)) { return fesm2_mul(a, a); } @@ -98,7 +98,7 @@ IPP_OWN_DECL(void, fesm2_mul_dual, (fesm2 pr1[], const fesm2 a1, const fesm2 b1, * \param[out] pr2 ptr second value no normalization * \param[in] a2 value (in radix 2^52) */ -__INLINE IPP_OWN_DEFN(void, fesm2_sqr_dual, (fesm2 pr1[], const fesm2 a1, fesm2 pr2[], const fesm2 a2)) { +__IPPCP_INLINE IPP_OWN_DEFN(void, fesm2_sqr_dual, (fesm2 pr1[], const fesm2 a1, fesm2 pr2[], const fesm2 a2)) { fesm2_mul_dual(pr1, a1, a1, pr2, a2, a2); return; } diff --git a/sources/ippcp/sm2/ifma_defs_sm2.h b/sources/ippcp/sm2/ifma_defs_sm2.h index 3b7c29eb..a469d28d 100644 --- a/sources/ippcp/sm2/ifma_defs_sm2.h +++ b/sources/ippcp/sm2/ifma_defs_sm2.h @@ -50,7 +50,7 @@ static const __ALIGN64 Ipp64u PSM2_R[PSM2_LEN52] = { * 0xFF - is equal one * 0x00 - is no equal one */ -__INLINE mask8 sm2_is_msb(const mask8 a) { +__IPPCP_INLINE mask8 sm2_is_msb(const mask8 a) { return (mask8)((mask8)0 - (a >> 7)); } @@ -62,7 +62,7 @@ __INLINE mask8 sm2_is_msb(const mask8 a) { * 0xFF - is zero value * 0x00 - no equal zero */ -__INLINE mask8 sm2_is_zero_i64(const m512 a) { +__IPPCP_INLINE mask8 sm2_is_zero_i64(const m512 a) { const mask8 mask = cmp_i64_mask(a, setzero_i64(), _MM_CMPINT_NE); return sm2_is_msb((~mask & (mask - 1))); } diff --git a/sources/ippcp/sm2/ifma_ecpoint_sm2.c b/sources/ippcp/sm2/ifma_ecpoint_sm2.c index c809c767..f4048113 100644 --- a/sources/ippcp/sm2/ifma_ecpoint_sm2.c +++ b/sources/ippcp/sm2/ifma_ecpoint_sm2.c @@ -38,7 +38,7 @@ static const __ALIGN64 Ipp64u psm2_x8[PSM2_LEN52] = { 0x000ffffffffffff8, 0x000f800000007fff, 0x000fffffffffffff, 0x000fffffffffffff, 0x0007fffffff7ffff}; /* Mont(a) = a*r mod psm2, where r = 2^(6*52) mod psm2 */ -static const __ALIGN64 Ipp64u psm2_a[PSM2_LEN52] = { +static const __ALIGN64 Ipp64u psm2_a[PSM2_LEN52] = { 0x000ffffffcffffff, 0x000ff03000000fcf, 0x000cffffffffffff, 0x000fffffffffffff, 0x0000fcfffffeffff}; /* Mont(b) = b*r mod psm2, where r = 2^(6*52) mod psm2 */ @@ -502,7 +502,7 @@ static __NOINLINE void clear_secret_context(Ipp16u* wval, return; } -__INLINE mask8 is_eq_mask(const Ipp32s a, const Ipp32s b) { +__IPPCP_INLINE mask8 is_eq_mask(const Ipp32s a, const Ipp32s b) { const Ipp32s eq = a ^ b; const Ipp32s v = ~eq & (eq - 1); const Ipp32s msb = 0 - (v >> (sizeof(a) * 8 - 1)); @@ -649,7 +649,7 @@ IPP_OWN_DEFN(void, gesm2_mul, (PSM2_POINT_IFMA * r, const PSM2_POINT_IFMA* p, co #define BP_WIN_SIZE BASE_POINT_WIN_SIZE #define BP_N_ENTRY BASE_POINT_N_ENTRY -__INLINE void extract_point_affine(PSM2_AFFINE_POINT_IFMA* r, +__IPPCP_INLINE void extract_point_affine(PSM2_AFFINE_POINT_IFMA* r, const SINGLE_PSM2_AFFINE_POINT_IFMA* tbl, const Ipp32s digit) { const Ipp32s idx = digit - 1; diff --git a/sources/ippcp/sm2/ifma_ecpoint_sm2.h b/sources/ippcp/sm2/ifma_ecpoint_sm2.h index 2a9dab66..8f325659 100644 --- a/sources/ippcp/sm2/ifma_ecpoint_sm2.h +++ b/sources/ippcp/sm2/ifma_ecpoint_sm2.h @@ -134,7 +134,7 @@ IPP_OWN_DECL(void, gesm2_select_ap_w7_ifma, (BNU_CHUNK_T * pAffinePoint, const B #include "pcpgfpstuff.h" #include "pcpgfpecstuff.h" -__INLINE void recode_point_to_mont52(PSM2_POINT_IFMA* pR, +__IPPCP_INLINE void recode_point_to_mont52(PSM2_POINT_IFMA* pR, const BNU_CHUNK_T* pP, BNU_CHUNK_T* pPool, ifmaArithMethod* method, @@ -161,7 +161,7 @@ __INLINE void recode_point_to_mont52(PSM2_POINT_IFMA* pR, pR->z = p_to_mont(pR->z); } -__INLINE void recode_point_to_mont64(IppsGFpECPoint* pR, +__IPPCP_INLINE void recode_point_to_mont64(IppsGFpECPoint* pR, PSM2_POINT_IFMA* pP, BNU_CHUNK_T* pPool, ifmaArithMethod* method, diff --git a/sources/ippcp/sm2/ifma_sm2_key_exchange_shared_key.c b/sources/ippcp/sm2/ifma_sm2_key_exchange_shared_key.c index 466c57a6..a8f03c28 100644 --- a/sources/ippcp/sm2/ifma_sm2_key_exchange_shared_key.c +++ b/sources/ippcp/sm2/ifma_sm2_key_exchange_shared_key.c @@ -26,7 +26,7 @@ /* clang-format off */ -__INLINE void ifma_sm2_set_affine_point_radix52(PSM2_POINT_IFMA *rp, +__IPPCP_INLINE void ifma_sm2_set_affine_point_radix52(PSM2_POINT_IFMA *rp, const BNU_CHUNK_T *x, const BNU_CHUNK_T *y, ifmaArithMethod *method) /* clang-format on */ @@ -45,7 +45,7 @@ __INLINE void ifma_sm2_set_affine_point_radix52(PSM2_POINT_IFMA *rp, } /* clang-format off */ -__INLINE void ifma_sm2_get_affine(BNU_CHUNK_T *x, BNU_CHUNK_T *y, +__IPPCP_INLINE void ifma_sm2_get_affine(BNU_CHUNK_T *x, BNU_CHUNK_T *y, const PSM2_POINT_IFMA* p, ifmaArithMethod* method) /* clang-format on */ @@ -115,7 +115,7 @@ IPP_OWN_DEFN(IppStatus, gfec_key_exchange_sm2_shared_key_avx512, (Ipp8u* pShared cpEcGFpReleasePool(1, pEC); IPP_BADARG_RET(!result, ippStsEphemeralKeyErr); - /* create buffer data (it needes further use compute tmp_p) + /* create buffer data (it needs further use compute tmp_p) * -> SM3( x(u/v)(0) || Za(1) || Zb(2) || xa(3) || ya(4) || xb(5) || yb(6) ) */ BNU_CHUNK_T *pDataBuff = cpGFpGetPool(7, pME); diff --git a/sources/ippcp/sm2/sm2_key_exchange_shared_key.c b/sources/ippcp/sm2/sm2_key_exchange_shared_key.c index 380cb649..a06952e9 100644 --- a/sources/ippcp/sm2/sm2_key_exchange_shared_key.c +++ b/sources/ippcp/sm2/sm2_key_exchange_shared_key.c @@ -57,7 +57,7 @@ * ippStsRangeErr - if BitSize(pEC) < IPP_SM3_DIGEST_BITSIZE * ippStsBadArgErr - if role(pKE) no equal ippKESM2Requester|ippKESM2Responder or sharedKeySize <= 0 * ippStsInvalidPrivateKey - if test is failed 0 < pPrvKey|pEphPrvKey < Order - * ippStsEphemeralKeyErr - if test is failed pEphPrvKey == pEphPublicKeySelf*G or if calculated U(V) is an + * ippStsEphemeralKeyErr - if test is failed pEphPrvKey == pEphPublicKeySelf*G or if calculated U(V) is an * infinity point, U/V = [h*t(a/b)]( P(b/a) + [x(b/a)`]R(b/a) ) = ( x(u/v), y(u/v) ) */ /* clang-format off */ @@ -148,7 +148,7 @@ IPPFUN(IppStatus, ippsGFpECKeyExchangeSM2_SharedKey, (Ipp8u* pSharedKey, int sha const int elemBytes = (elemBits + 7) / 8; /* size Bytes */ const int elemSize = GFP_FELEN(pME); /* size BNU_CHUNK */ - /* create buffer data (it needes further use compute tmp_p) + /* create buffer data (it needs further use compute tmp_p) * -> SM3( x(u/v)(0) || Za(1) || Zb(2) || xa(3) || ya(4) || xb(5) || yb(6) ) */ BNU_CHUNK_T *pDataBuff = cpGFpGetPool(7, pME); diff --git a/sources/ippcp/sm2/sm2_stuff.c b/sources/ippcp/sm2/sm2_stuff.c index 8eb5fc04..0da31f65 100644 --- a/sources/ippcp/sm2/sm2_stuff.c +++ b/sources/ippcp/sm2/sm2_stuff.c @@ -51,7 +51,7 @@ IPP_OWN_DEFN(IppStatus, computeZa_user_id_hash_sm2, (Ipp8u * pZa_digest, IPP_BAD_PTR2_RET(pZa_digest, p_user_id); /* check border (user_id_len > 0) | (elem_len > 0) */ IPP_BADARG_RET(!(user_id_len > 0) || !(elem_len > 0), ippStsBadArgErr); - /* check (user_id_len*8 <= 0xFFFF) ~ (user_id_len <= 0x1FFF) for two bytes overflow. + /* check (user_id_len*8 <= 0xFFFF) ~ (user_id_len <= 0x1FFF) for two bytes overflow. user_id_len*8 operation will be executed in algorithm's flow */ IPP_BADARG_RET(user_id_len > 0x1FFF, ippStsBadArgErr); /* param curve: a, b, Gx, Gy */ @@ -97,7 +97,7 @@ IPP_OWN_DEFN(IppStatus, computeZa_user_id_hash_sm2, (Ipp8u * pZa_digest, #define SIZE_CT (4) -__INLINE void convert_ct_to_big_endian(Ipp8u pCt[SIZE_CT], const Ipp32u ct) +__IPPCP_INLINE void convert_ct_to_big_endian(Ipp8u pCt[SIZE_CT], const Ipp32u ct) { pCt[0] = (Ipp8u)(ct >> 24); pCt[1] = (Ipp8u)(ct >> 16); diff --git a/sources/ippcp/sm2/sm2_stuff.h b/sources/ippcp/sm2/sm2_stuff.h index f0ab23ad..ccef5ed6 100644 --- a/sources/ippcp/sm2/sm2_stuff.h +++ b/sources/ippcp/sm2/sm2_stuff.h @@ -46,7 +46,7 @@ * @param[in out] arr array data * @param[in] len length array */ -__INLINE void cpSM2KE_reverse_inplace(Ipp8u *arr, const int len) +__IPPCP_INLINE void cpSM2KE_reverse_inplace(Ipp8u *arr, const int len) { #define SWAPXOR(x, y) \ (x) ^= (y); \ @@ -68,7 +68,7 @@ __INLINE void cpSM2KE_reverse_inplace(Ipp8u *arr, const int len) * @param[in] p point copy * @param[in] pEC context Elliptic Curve */ -__INLINE void cpSM2KE_CopyPointData(IppsGFpECPoint *r, BNU_CHUNK_T *data, const IppsGFpECPoint *p, const IppsGFpECState *pEC) +__IPPCP_INLINE void cpSM2KE_CopyPointData(IppsGFpECPoint *r, BNU_CHUNK_T *data, const IppsGFpECPoint *p, const IppsGFpECState *pEC) { ECP_POINT_SET_ID(r); cpEcGFpInitPoint(r, data, ECP_POINT_FLAGS(p), pEC); @@ -86,7 +86,7 @@ __INLINE void cpSM2KE_CopyPointData(IppsGFpECPoint *r, BNU_CHUNK_T *data, const * @param[in] a value x * @param[in] pEC context Elliptic Curve */ -__INLINE void cpSM2KE_reduction_x2w(BNU_CHUNK_T *r, const BNU_CHUNK_T *a, const IppsGFpECState *pEC) +__IPPCP_INLINE void cpSM2KE_reduction_x2w(BNU_CHUNK_T *r, const BNU_CHUNK_T *a, const IppsGFpECState *pEC) { const gsModEngine *pME = GFP_PMA(ECP_GFP(pEC)); @@ -107,7 +107,7 @@ __INLINE void cpSM2KE_reduction_x2w(BNU_CHUNK_T *r, const BNU_CHUNK_T *a, const } /* clang-format off */ -__INLINE void cpSM2KE_get_affine_ext_euclid(BNU_CHUNK_T *x, BNU_CHUNK_T *y, +__IPPCP_INLINE void cpSM2KE_get_affine_ext_euclid(BNU_CHUNK_T *x, BNU_CHUNK_T *y, const IppsGFpECPoint *p, IppsGFpECState *pEC) /* clang-format on */ @@ -120,7 +120,7 @@ __INLINE void cpSM2KE_get_affine_ext_euclid(BNU_CHUNK_T *x, BNU_CHUNK_T *y, return; } -__INLINE void cpSM2KE_xy_to_BE(BNU_CHUNK_T *x, BNU_CHUNK_T *y, const IppsGFpECState *pEC) +__IPPCP_INLINE void cpSM2KE_xy_to_BE(BNU_CHUNK_T *x, BNU_CHUNK_T *y, const IppsGFpECState *pEC) { const gsModEngine *pME = GFP_PMA(ECP_GFP(pEC)); @@ -139,7 +139,7 @@ __INLINE void cpSM2KE_xy_to_BE(BNU_CHUNK_T *x, BNU_CHUNK_T *y, const IppsGFpECSt * @param[in] a hashing an array data * @param[in] numBytes number of bytes */ -__INLINE void cpSM2KE_compute_hash_SM3(Ipp8u *r, const Ipp8u *a, const int numBytes) +__IPPCP_INLINE void cpSM2KE_compute_hash_SM3(Ipp8u *r, const Ipp8u *a, const int numBytes) { static IppsHashState_rmf ctx; diff --git a/tools/ipp_custom_library_tool_python/gui/settings_panel.py b/tools/ipp_custom_library_tool_python/gui/settings_panel.py index 5ec6f2fa..2146e797 100644 --- a/tools/ipp_custom_library_tool_python/gui/settings_panel.py +++ b/tools/ipp_custom_library_tool_python/gui/settings_panel.py @@ -189,4 +189,4 @@ def disable_widgets(self): def get_formatted_button_name(self, button): button_name = button.text().replace('(R)', '') - return re.sub('[^\w-]', '', button_name.lower()) + return re.sub(r'[^\w-]', '', button_name.lower()) diff --git a/tools/ipp_custom_library_tool_python/tool/utils.py b/tools/ipp_custom_library_tool_python/tool/utils.py index 18ad9458..4067b79b 100644 --- a/tools/ipp_custom_library_tool_python/tool/utils.py +++ b/tools/ipp_custom_library_tool_python/tool/utils.py @@ -27,16 +27,16 @@ OPENMP = 'openmp' TL_TYPES = [TBB, OPENMP] -PATH_TO_PACKAGE_REGEX = '(?P.*)\Wtools\W.*' -COMPONENTS_INSTALL_DIR_REGEX = '(?P.*)\Wipp.*' -VERSION_REGEX = '.*VERSION_STR\s*(?P.*)\s*' -STR_MACROS_REGEX = '.*STR\((?P\S*)\).*' -C_STRING_REGEX = '.*(\S|^)(?P\s*".*"\s*)(\S|$).*' -C_STRING_VALUE_REGEX = '.*"(?P.*)".*' -FUNCTION_NAME_REGEX = 'IPPAPI\s*\(\s*(?P.*?)\s*,' \ - '\s*(?P\S*)\s*,' \ - '\s*\(?(?P.*?)\s*\)?\s*\)?\s*$' -ARGUMENT_REGEX = '.*\W*\w+\W*\s+\W*(?P[^\W\d]+\w*)\W*?' +PATH_TO_PACKAGE_REGEX = r'(?P.*)\Wtools\W.*' +COMPONENTS_INSTALL_DIR_REGEX = r'(?P.*)\Wipp.*' +VERSION_REGEX = r'.*VERSION_STR\s*(?P.*)\s*' +STR_MACROS_REGEX = r'.*STR\((?P\S*)\).*' +C_STRING_REGEX = r'.*(\S|^)(?P\s*".*"\s*)(\S|$).*' +C_STRING_VALUE_REGEX = r'.*"(?P.*)".*' +FUNCTION_NAME_REGEX = r'IPPAPI\s*\(\s*(?P.*?)\s*,' \ + r'\s*(?P\S*)\s*,' \ + r'\s*\(?(?P.*?)\s*\)?\s*\)?\s*$' +ARGUMENT_REGEX = r'.*\W*\w+\W*\s+\W*(?P[^\W\d]+\w*)\W*?' CUSTOM_LIBRARY_NAME = 'Custom library name' BUILD_SCRIPT_NAME = 'Build script name'