diff --git a/BUILD.md b/BUILD.md
index 9012656a..1f501fa7 100644
--- a/BUILD.md
+++ b/BUILD.md
@@ -35,6 +35,7 @@
 - GCC 11.4
 - Clang 9.0
 - Clang 12.0
+- Clang 16.0
 - GNU binutils 2.32
 ### Windows* OS
 - [Common tools](#common-tools)
@@ -212,10 +213,10 @@ To build the Intel IPP Cryptography library on macOS\*, complete the following s
 - `-DPLATFORM_LIST="<platform list>"` - optional, works only if `-DMERGED_BLD:BOOL=off` is set. Sets target platforms for the code to be compiled. See the supported platforms list [here](./OVERVIEW.md).
 
     - Example for Linux\* OS and the IA-32 architecture:
-        `-DPLATFORM_LIST="m7;s8;p8;g9;h9"`
+        `-DPLATFORM_LIST="w7;s8;p8;g9;h9"`
 
     - Example for Linux\* OS and the Intel® 64 architecture:
-        `-DPLATFORM_LIST="w7;n8;y8;e9;l9;k0"`
+        `-DPLATFORM_LIST="m7;n8;y8;e9;l9;k0;k1"`
 - `-DNO_CRYPTO_MB:BOOL=TRUE` - optional, turns off the build of [Crypto Multi Buffer library](./sources/ippcp/crypto_mb/Readme.md) and, as a consequence, removes all dependencies on OpenSSL library.
 - `-DBABASSL:BOOL=on`, `-DBORINGSSL:BOOL=on` - required only if forks of OpenSSL library are used to resolve OpenSSL dependencies - Tongsuo and BoringSSL respectively. These flags make sense when [Crypto Multi Buffer library](./sources/ippcp/crypto_mb/Readme.md) is built.
 - `-DIPPCP_CUSTOM_BUILD="<CPU features list>"` - optional, works only if `-DMERGED_BLD:BOOL=off` is set, i.e. only for 1CPU libraries. Enables the CPU feature dispatching mask at compile-time based on the provided list.
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f23fda35..d60c9cad 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,12 @@
 
 This is a list of notable changes to Intel(R) IPP Cryptography, in reverse chronological order.
 
+## Intel(R) IPP Cryptography 2021.12
+- Added single-buffer implementation of Leighton-Micali Hash-Based Signatures(LMS) algorithm, verification part.
+- Added support of Clang 16.0 compiler for Linux.
+- Added examples of AES-GCM Encryption/Decryption usage.
+- AES-GCM algorithm with Intel® Advanced Vector Extensions 2 (Intel® AVX2) vector extensions of Intel® AES New Instructions (Intel® AES-NI) was optimized.
+
 ## Intel(R) IPP Cryptography 2021.11
 - Minimal supported BoringSSL version was increased to [45cf810d](https://github.com/google/boringssl/archive/45cf810dbdbd767f09f8cb0b0fcccd342c39041f.tar.gz) tag.
 
diff --git a/LICENSE b/LICENSE
index c7047e16..ec78a4e5 100644
--- a/LICENSE
+++ b/LICENSE
@@ -173,62 +173,4 @@
       incurred by, or claims asserted against, such Contributor by reason
       of your accepting any such warranty or additional liability.
 
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "{}"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright {yyyy} {name of copyright owner}
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-
-   CMake
-   ------------------------------
-   CMake - Cross Platform Makefile Generator
-   Copyright 2000-2021 Kitware, Inc. and Contributors
-   All rights reserved.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions
-   are met:
-
-   * Redistributions of source code must retain the above copyright
-     notice, this list of conditions and the following disclaimer.
-
-   * Redistributions in binary form must reproduce the above copyright
-     notice, this list of conditions and the following disclaimer in the
-     documentation and/or other materials provided with the distribution.
-
-   * Neither the name of Kitware, Inc. nor the names of Contributors
-     may be used to endorse or promote products derived from this
-     software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
+END OF TERMS AND CONDITIONS
diff --git a/README.md b/README.md
index 1ac624a6..bda55102 100644
--- a/README.md
+++ b/README.md
@@ -26,6 +26,7 @@ The library provides a comprehensive set of routines commonly used for cryptogra
 - Finite Field Arithmetic Functions
 - Big Number Integer Arithmetic Functions
 - PRNG/TRNG and Prime Numbers Generation
+- Hash-based signature algorithms
 
 ## Reasons to Use Intel IPP Cryptography
 - Security (constant-time execution for secret processing functions)
diff --git a/README_FIPS.md b/README_FIPS.md
index 29af90c3..ca16a29b 100644
--- a/README_FIPS.md
+++ b/README_FIPS.md
@@ -25,7 +25,7 @@ In general, software may be certified at up to level 2.
 
 Intel® Integrated Performance Primitives Cryptography (Intel(R) IPP Cryptography)
 provides building blocks of FIPS-mode API (such as self-tests, FIPS-approved
-functionality status query) which can help the end users to fullfill FIPS level 1 requirements.
+functionality status query) which can help the end users to fulfill FIPS level 1 requirements.
 Please, refer to [Covered Algorithms](#covered-algorithms) section for the full
 list of FIPS-Approved API which are covered with the selftests.
 
@@ -41,7 +41,7 @@ Intel(R) IPP Cryptography may be built in FIPS-mode with IPPCP_FIPS_MODE=on
 configuration for ippcp and MBX_FIPS_MODE=on for crypto_MB (see details in [Build section](#build)).
 
 Application, which uses Intel(R) IPP Cryptography may be **FIPS-Certified** by
-matching FIPS 140 requirement and obtaining NIST sertificate or also be **FIPS-Compliant** for their own customers.
+matching FIPS 140 requirement and obtaining NIST certificate or also be **FIPS-Compliant** for their own customers.
 
 Please, refer to [Level 1 Specific Requirements](#level-1-specific-requirements)
 for the detailed description of what is done on Intel(R) IPP Cryptography-side
@@ -59,7 +59,7 @@ and what should be done by a more high-level application.
 | 6 | Run pairwise consistency selftest for newly generated RSA/ECC keypair | Intel(R) IPP Cryptography provides [fips_selftest_ippcp<algorithm name> API](#covered-algorithms) to run selftests |
 | 7 | Module to guarantee uniqueness of GSM key + IV | **User's application effort required** |
 | 8 | Module to guarantee XTS key1 != key2 | Intel(R) IPP Cryptography-side check |
-| 9 | (non-production) Extract raw noise source output samples of RBG for quality analysis | DBRNG is currenty out of the cryptography boundary |
+| 9 | (non-production) Extract raw noise source output samples of RBG for quality analysis | DBRNG is currently out of the cryptography boundary |
 | 10| (non-production) Run crypto algorithm testing with NIST-generated vectors | Done offline by Intel(R) IPP Cryptography for the [covered algorithms](#covered-algorithms) |
 
 For the implementation details about the steps in [Level 1 Specific Requirements](#level-1-specific-requirements)
@@ -107,7 +107,7 @@ Configuration example for ippcp with Intel® C++ Compiler:
 
 `CC=icc CXX=icpc cmake CMakeLists.txt -B_build -DARCH=intel64 -DIPPCP_FIPS_MODE:BOOL=on[-DIPPCP_SELFTEST_USE_MALLOC:BOOL=on]`
 
-> Note: selftests with intenal memory allocation uses malloc, which introduces
+> Note: selftests with internal memory allocation uses malloc, which introduces
 a c runtime dependency.
 To avoid the dependency, use IPPCP_SELFTEST_USE_MALLOC:BOOL=off or do not specify
 it as this as the default. In this case, all self-tests will require external memory allocation.
@@ -186,7 +186,7 @@ mbx_nistp256_ecdh_mb8(sharedBA, prvB, pubAx, pubAy, pubAz_curr, 0);
 #### Intel(R) IPP Cryptography
 
 Each API from the list is covered with the selftest fips_selftest_ipps<API_name>
-availible in Intel(R) IPP Cryptography build in FIPS mode.
+available in Intel(R) IPP Cryptography build in FIPS mode.
 
 ##### AES
 
diff --git a/data/images/README_FIPS-pictures-0-ippcp_architecture.png b/data/images/README_FIPS-pictures-0-ippcp_architecture.png
index ef9381a9..b4c5f563 100644
Binary files a/data/images/README_FIPS-pictures-0-ippcp_architecture.png and b/data/images/README_FIPS-pictures-0-ippcp_architecture.png differ
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 3de76439..adeccbc3 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -20,9 +20,12 @@
 
 # List of examples for targets generation
 set(IPPCP_EXAMPLES
-  # AES examples
+  # AES-CTR examples
   aes/aes-256-ctr-encryption.cpp
   aes/aes-256-ctr-decryption.cpp
+  # AES-GCM examples
+  aes/aes-128-gcm-encryption.cpp
+  aes/aes-128-gcm-decryption.cpp
   # DSA
   dsa/dsa-dlp-sha-1-verification.cpp
   dsa/dsa-dlp-sha-256-verification.cpp
diff --git a/examples/aes/aes-128-gcm-decryption.cpp b/examples/aes/aes-128-gcm-decryption.cpp
new file mode 100644
index 00000000..d57afec4
--- /dev/null
+++ b/examples/aes/aes-128-gcm-decryption.cpp
@@ -0,0 +1,172 @@
+/*************************************************************************
+* Copyright (C) 2024 Intel Corporation
+*
+* Licensed under the Apache License,  Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* 	http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law  or agreed  to  in  writing,  software
+* distributed under  the License  is  distributed  on  an  "AS IS"  BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the  specific  language  governing  permissions  and
+* limitations under the License.
+*************************************************************************/
+
+/*!
+  *
+  *  \file
+  *
+  *  \brief AES Galois Counter mode of operation (GCM) example
+  *
+  *  This example demonstrates usage of AES block cipher with 128-bit key
+  *  run with GCM mode of operation. Decryption scheme.
+  *
+  *  The GCM mode of operation is implemented according to the
+  *  "NIST Special Publication 800-38D: Recommendation for Block Cipher Modes of
+  *  Operation: Galois/Counter Mode (GCM) and GMAC" document:
+  *
+  *  https://csrc.nist.gov/pubs/sp/800/38/d/final
+  *
+  */
+
+#include <cstring>
+
+#include "ippcp.h"
+#include "examples_common.h"
+
+/*! Key size in bytes */
+static const int KEY_SIZE = 16;
+
+/*! Message size in bytes */
+static const int MSG_LEN = 60;
+
+/*! Initialization vector size in bytes */
+static const int IV_LEN = 12;
+
+/*! Tag size in bytes */
+static const int TAG_LEN = 16;
+
+/*! Additional authenticated data size in bytes */
+static const int AAD_LEN = 20;
+
+/*! 128-bit secret key */
+static Ipp8u key128[KEY_SIZE] = {
+    0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,
+    0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08
+};
+
+/*! Initialization vector */
+static const Ipp8u iv[IV_LEN] = {
+    0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,
+    0xde,0xca,0xf8,0x88
+};
+
+/*! Plain text */
+static Ipp8u plainText[MSG_LEN] = {
+    0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,
+    0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
+    0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,
+    0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
+    0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,
+    0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
+    0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,
+    0xba,0x63,0x7b,0x39
+};
+
+/*! Cipher text */
+static Ipp8u cipherText[MSG_LEN] = {
+    0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,
+    0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
+    0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,
+    0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
+    0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,
+    0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
+    0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,
+    0x3d,0x58,0xe0,0x91
+};
+
+/*! Tag */
+static const Ipp8u tag[TAG_LEN] = {
+    0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,
+    0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47
+};
+
+/*! Additional authenticated data */
+static const Ipp8u aad[AAD_LEN] = {
+    0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
+    0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
+    0xab,0xad,0xda,0xd2
+};
+
+/*! Main function  */
+int main(void)
+{
+    /* Size of AES-GCM context structure. It will be set up in ippsAES_GCMGetSize(). */
+    int AESGCMSize = 0;
+
+    /* Output plain text */
+    Ipp8u pOutPlainText[MSG_LEN] = {};
+    /* Output tag */
+    Ipp8u pOutTag[TAG_LEN]        = {};
+
+    /* Pointer to AES-GCM context structure */
+    IppsAES_GCMState* pAESGCMState = 0;
+
+    /* Internal function status */
+    IppStatus status = ippStsNoErr;
+
+    do {
+        /* 1. Get size needed for AES-GCM context structure */
+        status = ippsAES_GCMGetSize(&AESGCMSize);
+        if (!checkStatus("ippsAES_GCMGetSize", ippStsNoErr, status))
+            return status;
+
+        /* 2. Allocate memory for AES-GCM context structure */
+        pAESGCMState = (IppsAES_GCMState*)(new Ipp8u[AESGCMSize]);
+        if (NULL == pAESGCMState) {
+            printf("ERROR: Cannot allocate memory (%d bytes) for AES-GCM state\n", AESGCMSize);
+            return -1;
+        }
+
+        /* 3. Initialize AES-GCM context */
+        status = ippsAES_GCMInit(key128, KEY_SIZE, pAESGCMState, AESGCMSize);
+        if (!checkStatus("ippsAES_GCMInit", ippStsNoErr, status))
+            break;
+
+        /* 4. Decryption setup */
+        status = ippsAES_GCMStart(iv, IV_LEN, aad, AAD_LEN, pAESGCMState);
+        if (!checkStatus("ippsAES_GCMStart", ippStsNoErr, status))
+            break;
+
+        /* 5.Decryption */
+        status = ippsAES_GCMDecrypt(cipherText, pOutPlainText, MSG_LEN, pAESGCMState);
+        if (!checkStatus("ippsAES_GCMDecrypt", ippStsNoErr, status))
+            break;
+
+        /* 6. Get tag */
+        status = ippsAES_GCMGetTag(pOutTag, TAG_LEN, pAESGCMState);
+        if (!checkStatus("ippsAES_GCMGetTag", ippStsNoErr, status))
+            break;
+
+        /* Compare output to known answer */
+        if (0 != memcmp(pOutTag, tag, TAG_LEN)) {
+            printf("ERROR: Output tag and reference tag do not match\n");
+            break;
+        }
+        if (0 != memcmp(pOutPlainText, plainText, MSG_LEN)) {
+            printf("ERROR: Decrypted and plain text do not match\n");
+            break;
+        }
+    } while (0);
+
+    /* 7. Remove secret and release resources */
+    ippsAES_GCMReset(pAESGCMState);
+    if (pAESGCMState) 
+        delete [] (Ipp8u*)pAESGCMState;
+
+    PRINT_EXAMPLE_STATUS("ippsAES_GCMDecrypt", "AES-GCM 128 Decryption", !status)
+
+    return status;
+}
diff --git a/examples/aes/aes-128-gcm-encryption.cpp b/examples/aes/aes-128-gcm-encryption.cpp
new file mode 100644
index 00000000..102a50f6
--- /dev/null
+++ b/examples/aes/aes-128-gcm-encryption.cpp
@@ -0,0 +1,172 @@
+/*************************************************************************
+* Copyright (C) 2024 Intel Corporation
+*
+* Licensed under the Apache License,  Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* 	http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law  or agreed  to  in  writing,  software
+* distributed under  the License  is  distributed  on  an  "AS IS"  BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the  specific  language  governing  permissions  and
+* limitations under the License.
+*************************************************************************/
+
+/*!
+  *
+  *  \file
+  *
+  *  \brief AES Galois Counter mode of operation (GCM) example
+  *
+  *  This example demonstrates usage of AES block cipher with 128-bit key
+  *  run with GCM mode of operation. Encryption scheme.
+  *
+  *  The GCM mode of operation is implemented according to the
+  *  "NIST Special Publication 800-38D: Recommendation for Block Cipher Modes of
+  *  Operation: Galois/Counter Mode (GCM) and GMAC" document:
+  *
+  *  https://csrc.nist.gov/pubs/sp/800/38/d/final
+  *
+  */
+
+#include <cstring>
+
+#include "ippcp.h"
+#include "examples_common.h"
+
+/*! Key size in bytes */
+static const int KEY_SIZE = 16;
+
+/*! Message size in bytes */
+static const int MSG_LEN = 60;
+
+/*! Initialization vector size in bytes */
+static const int IV_LEN = 12;
+
+/*! Tag size in bytes */
+static const int TAG_LEN = 16;
+
+/*! Additional authenticated data size in bytes */
+static const int AAD_LEN = 20;
+
+/*! 128-bit secret key */
+static Ipp8u key128[KEY_SIZE] = {
+    0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,
+    0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08
+};
+
+/*! Initialization vector */
+static const Ipp8u iv[IV_LEN] = {
+    0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,
+    0xde,0xca,0xf8,0x88
+};
+
+/*! Plain text */
+static Ipp8u plainText[MSG_LEN] = {
+    0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,
+    0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
+    0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,
+    0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
+    0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,
+    0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
+    0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,
+    0xba,0x63,0x7b,0x39
+};
+
+/*! Cipher text */
+static Ipp8u cipherText[MSG_LEN] = {
+    0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,
+    0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
+    0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,
+    0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
+    0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,
+    0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
+    0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,
+    0x3d,0x58,0xe0,0x91
+};
+
+/*! Tag */
+static const Ipp8u tag[TAG_LEN] = {
+    0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,
+    0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47
+};
+
+/*! Additional authenticated data */
+static const Ipp8u aad[AAD_LEN] = {
+    0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
+    0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
+    0xab,0xad,0xda,0xd2
+};
+
+/*! Main function  */
+int main(void)
+{
+    /* Size of AES-GCM context structure. It will be set up in ippsAES_GCMGetSize(). */
+    int AESGCMSize = 0;
+
+    /* Output cipher text */
+    Ipp8u pOutCipherText[MSG_LEN] = {};
+    /* Output tag */
+    Ipp8u pOutTag[TAG_LEN]        = {};
+
+    /* Pointer to AES-GCM context structure */
+    IppsAES_GCMState* pAESGCMState = 0;
+
+    /* Internal function status */
+    IppStatus status = ippStsNoErr;
+
+    do {
+        /* 1. Get size needed for AES-GCM context structure */
+        status = ippsAES_GCMGetSize(&AESGCMSize);
+        if (!checkStatus("ippsAES_GCMGetSize", ippStsNoErr, status))
+            return status;
+
+        /* 2. Allocate memory for AES-GCM context structure */
+        pAESGCMState = (IppsAES_GCMState*)(new Ipp8u[AESGCMSize]);
+        if (NULL == pAESGCMState) {
+            printf("ERROR: Cannot allocate memory (%d bytes) for AES-GCM state\n", AESGCMSize);
+            return -1;
+        }
+
+        /* 3. Initialize AES-GCM context */
+        status = ippsAES_GCMInit(key128, KEY_SIZE, pAESGCMState, AESGCMSize);
+        if (!checkStatus("ippsAES_GCMInit", ippStsNoErr, status))
+            break;
+
+        /* 4. Encryption setup */
+        status = ippsAES_GCMStart(iv, IV_LEN, aad, AAD_LEN, pAESGCMState);
+        if (!checkStatus("ippsAES_GCMStart", ippStsNoErr, status))
+            break;
+
+        /* 5. Encryption */
+        status = ippsAES_GCMEncrypt(plainText, pOutCipherText, MSG_LEN, pAESGCMState);
+        if (!checkStatus("ippsAES_GCMEncrypt", ippStsNoErr, status))
+            break;
+
+        /* 6. Get tag */
+        status = ippsAES_GCMGetTag(pOutTag, TAG_LEN, pAESGCMState);
+        if (!checkStatus("ippsAES_GCMGetTag", ippStsNoErr, status))
+            break;
+
+        /* Compare output to known answer */
+        if (0 != memcmp(pOutTag, tag, TAG_LEN)) {
+            printf("ERROR: Output tag and reference tag do not match\n");
+            break;
+        }
+        if (0 != memcmp(pOutCipherText, cipherText, MSG_LEN)) {
+            printf("ERROR: Encrypted and reference messages do not match\n");
+            break;
+        }
+    } while (0);
+
+    /* 7. Remove secret and release resources */
+    ippsAES_GCMReset(pAESGCMState);
+    if (pAESGCMState) 
+        delete [] (Ipp8u*)pAESGCMState;
+
+    PRINT_EXAMPLE_STATUS("ippsAES_GCMEncrypt", "AES-GCM 128 Encryption", !status)
+
+    return status;
+}
diff --git a/examples/examplesBuildOptions.cmake b/examples/examplesBuildOptions.cmake
index ac8dfa96..e3d830af 100644
--- a/examples/examplesBuildOptions.cmake
+++ b/examples/examplesBuildOptions.cmake
@@ -60,9 +60,12 @@ if(UNIX)
     set(LINK_FLAG_S_ST_LINUX "-Wl,-z,noexecstack -Wl,-z,relro -Wl,-z,now")
     if(NOT NONPIC_LIB)
       ippcp_extend_variable(LINK_FLAG_S_ST_LINUX "-fpie")
+      ippcp_extend_variable(CMAKE_CXX_FLAGS "-fpie -fPIE")
+    else()
+      ippcp_extend_variable(LINK_FLAG_S_ST_LINUX "-no-pie")
     endif()
 
-    ippcp_extend_variable(CMAKE_CXX_FLAGS "-D_FORTIFY_SOURCE=2 -Wformat -Wformat-security -fpie -fPIE")
+    ippcp_extend_variable(CMAKE_CXX_FLAGS "-D_FORTIFY_SOURCE=2 -Wformat -Wformat-security")
 
     if(${ARCH} MATCHES "ia32")
       ippcp_extend_variable(LINK_FLAG_S_ST_LINUX "-m32")
diff --git a/examples/utils/bignum.h b/examples/utils/bignum.h
index 9c070afc..2ef32680 100644
--- a/examples/utils/bignum.h
+++ b/examples/utils/bignum.h
@@ -42,7 +42,7 @@ class BigNumber
    friend IppsBigNumState* BN(const BigNumber& bn) {return bn.m_pBN;}
    operator IppsBigNumState* () const { return m_pBN; }
 
-   // some useful constatns
+   // some useful constants
    static const BigNumber& Zero();
    static const BigNumber& One();
    static const BigNumber& Two();
diff --git a/include/ippcp.h b/include/ippcp.h
index b3a0e670..f5cb74eb 100644
--- a/include/ippcp.h
+++ b/include/ippcp.h
@@ -1555,6 +1555,23 @@ IPPAPI(IppStatus, ippsXMSSVerify, (const Ipp8u* pMsg,
 
 #endif // IPPCP_PREVIEW_XMSS
 
+#ifdef IPPCP_PREVIEW_LMS
+
+IPPAPI(IppStatus, ippsLMSBufferGetSize, (Ipp32s* pSize, Ipp32s maxMessageLength, const IppsLMSAlgoType lmsType))
+IPPAPI(IppStatus, ippsLMSSignatureStateGetSize, (Ipp32s* pSize, const IppsLMSAlgoType lmsType))
+IPPAPI(IppStatus, ippsLMSPublicKeyStateGetSize, (Ipp32s* pSize, const IppsLMSAlgoType lmsType))
+IPPAPI(IppStatus, ippsLMSSetPublicKeyState, (const IppsLMSAlgoType lmsType, const Ipp8u* pI, const Ipp8u* pK,
+                                             IppsLMSPublicKeyState* pState))
+IPPAPI(IppStatus, ippsLMSSetSignatureState, (const IppsLMSAlgoType lmsType, Ipp32u q, const Ipp8u* pC,
+                                             const Ipp8u* pY, const Ipp8u* pAuthPath,
+                                             IppsLMSSignatureState* pState))
+IPPAPI(IppStatus, ippsLMSVerify, (const Ipp8u* pMsg, const Ipp32s msgLen,
+                                  const IppsLMSSignatureState* pSign,
+                                  int*  pIsSignValid,
+                                  const IppsLMSPublicKeyState* pKey,
+                                  Ipp8u* pBuffer))
+#endif // IPPCP_PREVIEW_LMS
+
 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && !defined(__INTEL_LLVM_COMPILER)
 #pragma warning(pop)
 #endif
diff --git a/include/ippcp/fips_cert.h b/include/ippcp/fips_cert.h
index 56d2bc87..4feef86c 100644
--- a/include/ippcp/fips_cert.h
+++ b/include/ippcp/fips_cert.h
@@ -222,7 +222,8 @@ enum FIPS_IPPCP_FUNC {
     GFpECESEncrypt_SM2,
     GFpECESDecrypt_SM2,
     GFpECESFinal_SM2,
-    XMSSVerify
+    XMSSVerify,
+    LMSVerify
 };
 
 /**
diff --git a/include/ippcpdefs.h b/include/ippcpdefs.h
index f57f4a2a..a1c41558 100644
--- a/include/ippcpdefs.h
+++ b/include/ippcpdefs.h
@@ -853,28 +853,84 @@ IPPAPI( const char*, ippcpGetStatusString, ( IppStatus StsCode ))
 IPPAPI( int, ippcpGetEnabledNumThreads, ( void ) )
 IPPAPI( Ipp64u, ippcpGetCpuClocks, (void) )
 
-#ifdef IPPCP_PREVIEW_XMSS
+/* Defines related to experimental features enabling */
+#ifdef IPPCP_PREVIEW_ALL
+    #ifndef IPPCP_PREVIEW_XMSS
+    #define IPPCP_PREVIEW_XMSS (1)
+    #endif
+    #ifndef IPPCP_PREVIEW_LMS
+    #define IPPCP_PREVIEW_LMS  (1)
+    #endif
+#endif
+
 /*
 // =========================================================
 // XMSS Algo
 // =========================================================
 */
+#ifdef IPPCP_PREVIEW_XMSS
+    typedef enum
+    {
+        reserved = 0,
+        XMSS_SHA2_10_256 = 1,
+        XMSS_SHA2_16_256 = 2,
+        XMSS_SHA2_20_256 = 3,
+        XMSS_SHA2_10_512 = 4,
+        XMSS_SHA2_16_512 = 5,
+        XMSS_SHA2_20_512 = 6
+    } IppsXMSSAlgo;
+
+    typedef struct _cpXMSSSignatureState IppsXMSSSignatureState;
+    typedef struct _cpXMSSPublicKeyState IppsXMSSPublicKeyState;
 
-typedef enum
-{
-    reserved = 0,
-    XMSS_SHA2_10_256 = 1,
-    XMSS_SHA2_16_256 = 2,
-    XMSS_SHA2_20_256 = 3,
-    XMSS_SHA2_10_512 = 4,
-    XMSS_SHA2_16_512 = 5,
-    XMSS_SHA2_20_512 = 6
-} IppsXMSSAlgo;
+#endif // IPPCP_PREVIEW_XMSS
 
-typedef struct _cpXMSSSignatureState IppsXMSSSignatureState;
-typedef struct _cpXMSSPublicKeyState IppsXMSSPublicKeyState;
 
-#endif // IPPCP_PREVIEW_XMSS
+/*
+// =========================================================
+// LMS Algo
+// =========================================================
+*/
+#ifdef IPPCP_PREVIEW_LMS
+    /* Parameters set is based on two articles:
+     *  RFC8554 (https://datatracker.ietf.org/doc/html/rfc8554)
+     *  https://datatracker.ietf.org/doc/html/draft-fluhrer-lms-more-parm-sets-00
+     */
+    typedef enum
+    {
+        LMOTS_SHA256_N32_W1 = 1,
+        LMOTS_SHA256_N32_W2 = 2,
+        LMOTS_SHA256_N32_W4 = 3,
+        LMOTS_SHA256_N32_W8 = 4,
+        LMOTS_SHA256_N24_W1 = 5,
+        LMOTS_SHA256_N24_W2 = 6,
+        LMOTS_SHA256_N24_W4 = 7,
+        LMOTS_SHA256_N24_W8 = 8
+    } IppsLMOTSAlgo;
+
+    typedef enum
+    {
+        LMS_SHA256_M32_H5  = 5,
+        LMS_SHA256_M32_H10 = 6,
+        LMS_SHA256_M32_H15 = 7,
+        LMS_SHA256_M32_H20 = 8,
+        LMS_SHA256_M32_H25 = 9,
+        LMS_SHA256_M24_H5  = 10,
+        LMS_SHA256_M24_H10 = 11,
+        LMS_SHA256_M24_H15 = 12,
+        LMS_SHA256_M24_H20 = 13,
+        LMS_SHA256_M24_H25 = 14
+    } IppsLMSAlgo;
+
+    typedef struct {
+        IppsLMOTSAlgo lmotsOIDAlgo;
+        IppsLMSAlgo   lmsOIDAlgo;
+    } IppsLMSAlgoType;
+
+    typedef struct _cpLMSSignatureState IppsLMSSignatureState;
+    typedef struct _cpLMSPublicKeyState IppsLMSPublicKeyState;
+#endif // IPPCP_PREVIEW_LMS
+
 
 #ifdef __cplusplus
 }
diff --git a/include/ippversion.h b/include/ippversion.h
index 17851429..54bf8f9f 100644
--- a/include/ippversion.h
+++ b/include/ippversion.h
@@ -26,14 +26,14 @@
 #if !defined( IPPVERSION_H__ )
 #define IPPVERSION_H__
 
-#define IPP_VERSION_MAJOR  2021
-#define IPP_VERSION_MINOR  12
+#define IPP_VERSION_MAJOR  1
+#define IPP_VERSION_MINOR  0
 #define IPP_VERSION_UPDATE 0
 
 // Major interface version
-#define IPP_INTERFACE_VERSION_MAJOR 11
+#define IPP_INTERFACE_VERSION_MAJOR 12
 // Minor interface version
-#define IPP_INTERFACE_VERSION_MINOR 14
+#define IPP_INTERFACE_VERSION_MINOR 0
 
 #define IPP_VERSION_STR  STR(IPP_VERSION_MAJOR) "." STR(IPP_VERSION_MINOR) "." STR(IPP_VERSION_UPDATE) " (" STR(IPP_INTERFACE_VERSION_MAJOR) "." STR(IPP_INTERFACE_VERSION_MINOR) " )"
 
diff --git a/sources/cmake/windows/IntelLLVM2023.1.0.cmake b/sources/cmake/windows/IntelLLVM2023.1.0.cmake
new file mode 100644
index 00000000..dd570edd
--- /dev/null
+++ b/sources/cmake/windows/IntelLLVM2023.1.0.cmake
@@ -0,0 +1,107 @@
+#===============================================================================
+# Copyright 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#===============================================================================
+
+#
+# Intel® Integrated Performance Primitives Cryptography (Intel® IPP Cryptography)
+#
+
+# linker
+set(LINK_FLAG_STATIC_WINDOWS "/ignore:4221") # ignore warnings about empty obj files
+# Suppresses the display of the copyright banner when the compiler starts up and display of informational messages during compiling.
+set(LINK_FLAG_DYNAMIC_WINDOWS "/nologo")
+# Displays information about modules that are incompatible with safe structured exception handling when /SAFESEH isn't specified.
+set(LINK_FLAG_DYNAMIC_WINDOWS "${LINK_FLAG_DYNAMIC_WINDOWS} /VERBOSE:SAFESEH")
+# Disable incremental linking
+set(LINK_FLAG_DYNAMIC_WINDOWS "${LINK_FLAG_DYNAMIC_WINDOWS} /INCREMENTAL:NO")
+# The /NODEFAULTLIB option tells the linker to remove one or more default libraries from the list of libraries it searches when resolving external references.
+set(LINK_FLAG_DYNAMIC_WINDOWS "${LINK_FLAG_DYNAMIC_WINDOWS} /NODEFAULTLIB")
+# Indicates that an executable was tested to be compatible with the Windows Data Execution Prevention feature.
+set(LINK_FLAG_DYNAMIC_WINDOWS "${LINK_FLAG_DYNAMIC_WINDOWS} /NXCOMPAT")
+# Specifies whether to generate an executable image that can be randomly rebased at load time.
+set(LINK_FLAG_DYNAMIC_WINDOWS "${LINK_FLAG_DYNAMIC_WINDOWS} /DYNAMICBASE")
+# Enable Intel® Control-Flow Enforcement Technology (Intel® CET) protection
+set(LINK_FLAG_DYNAMIC_WINDOWS "${LINK_FLAG_DYNAMIC_WINDOWS} /CETCOMPAT")
+
+if(${ARCH} MATCHES "ia32")
+  # When /SAFESEH is specified, the linker will only produce an image if it can also produce a table of the image's safe exception handlers.
+  set(LINK_FLAG_DYNAMIC_WINDOWS "${LINK_FLAG_DYNAMIC_WINDOWS} /SAFESEH")
+else()
+  # The /LARGEADDRESSAWARE option tells the linker that the application can handle addresses larger than 2 gigabytes.
+  set(LINK_FLAG_DYNAMIC_WINDOWS "${LINK_FLAG_DYNAMIC_WINDOWS} /LARGEADDRESSAWARE")
+  # This option modifies the header of an executable image, a .dll file or .exe file, to indicate whether ASLR with 64-bit addresses is supported.
+  set(LINK_FLAG_DYNAMIC_WINDOWS "${LINK_FLAG_DYNAMIC_WINDOWS} /HIGHENTROPYVA")
+endif(${ARCH} MATCHES "ia32")
+
+# Disables linking to Intel® libraries
+set(LINK_FLAG_DYNAMIC_WINDOWS "${LINK_FLAG_DYNAMIC_WINDOWS} /Qno-intel-lib")
+
+# Link to universal C runtime and MSVC runtime. Used in dlls.
+set(LINK_LIB_STATIC_RELEASE libcmt libucrt libvcruntime)
+set(LINK_LIB_STATIC_DEBUG libcmtd libucrtd libvcruntime)
+
+# compiler
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${LIBRARY_DEFINES}")
+
+# Suppresses the display of the copyright banner when the compiler starts up and display of informational messages during compiling.
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /nologo")
+# Warning level = 4
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4")
+# Changes all warnings to errors.
+#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /WX")
+# Detects some buffer overruns that overwrite a function's return address, exception handler address, or certain types of parameters. 
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /GS")
+# Controls how the members of a structure are packed into memory and specifies the same packing for all structures in a module.
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Zp16")
+# Allows the compiler to package individual functions in the form of packaged functions. Smaller resulting size.
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Gy")
+# C std
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qstd=c99")
+# Enable Intel® Control-Flow Enforcement Technology (Intel® CET) protection
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fcf-protection:full")
+# Suppress some warnings
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Wno-missing-braces /Wno-null-pointer-arithmetic /Wno-unused-function /Wno-static-in-inline /Qno-intel-lib")
+
+# Causes the application to use the multithread, static version of the run-time library (debug version).
+set(CMAKE_C_FLAGS_DEBUG "/MTd")
+# The /Zi option produces a separate PDB file that contains all the symbolic debugging information for use with the debugger.
+set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /Zi")
+# Turns off all optimizations in the program and speeds compilation.
+set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /Od")
+# Debug macro
+set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /DDEBUG")
+
+# Causes the application to use the multithread, static version of the run-time library.
+set(CMAKE_C_FLAGS_RELEASE "/MT")
+# Omits the default C runtime library name from the .obj file.
+set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zl")
+# "Maximize Speed". Selects a predefined set of options that affect the size and speed of generated code.
+set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /O3")
+# No-debug macro
+set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /DNDEBUG")
+
+set(w7_opt "${w7_opt} /arch:SSE2")
+set(s8_opt "${s8_opt} /arch:SSSE3")
+set(p8_opt "${p8_opt} /arch:SSE4.2 -maes -mpclmul -msha")
+set(g9_opt "${g9_opt} /arch:AVX -maes -mpclmul -msha -mrdrnd -mrdseed")
+set(h9_opt "${h9_opt} /arch:AVX2 -maes -mpclmul -msha -mrdrnd -mrdseed -mvaes -mvpclmulqdq")
+set(m7_opt "${m7_opt} /arch:SSE3")
+set(n8_opt "${n8_opt} /arch:SSSE3")
+set(y8_opt "${y8_opt} /arch:SSE4.2 -maes -mpclmul -msha")
+set(e9_opt "${e9_opt} /arch:AVX -maes -mpclmul -msha -mrdrnd -mrdseed")
+set(l9_opt "${l9_opt} /arch:CORE-AVX2 -maes -mpclmul -msha -mrdrnd -mrdseed -mvaes -mvpclmulqdq")
+set(n0_opt "${n0_opt} /arch:CORE-AVX2 -maes -mavx512f -mavx512cd -mavx512pf -mavx512er -mpclmul -msha -mrdrnd -mrdseed")
+set(k0_opt "${k0_opt} /arch:SKYLAKE-AVX512 -maes -mavx512f -mavx512cd -mavx512vl -mavx512bw -mavx512dq -mpclmul -mrdrnd -mrdseed -madx")
+set(k1_opt "${k1_opt} /arch:ICELAKE-SERVER -maes -mavx512f -mavx512cd -mavx512vl -mavx512bw -mavx512dq -mavx512ifma -mpclmul -msha -mrdrnd -mrdseed -madx -mgfni -mvaes -mvpclmulqdq -mavx512vbmi -mavx512vbmi2")
diff --git a/sources/dispatcher/gen_disp_lin32.nonpic.py b/sources/dispatcher/gen_disp_lin32.nonpic.py
index 457dceb4..a59b6d60 100644
--- a/sources/dispatcher/gen_disp_lin32.nonpic.py
+++ b/sources/dispatcher/gen_disp_lin32.nonpic.py
@@ -107,7 +107,7 @@
 {FunName}:
     {endbr32}
     mov   eax, dword [ippcpJumpIndexForMergedLibs]
-    jmp   dword [rel arraddr_{FunName} + eax*4]
+    jmp   dword [arraddr_{FunName} + eax*4]
 .LEnd{FunName}:
 """.format(FunName=FunName, size=size, endbr32='db 0xf3, 0x0f, 0x1e, 0xfb'))
             ASMDISP.close()
diff --git a/sources/include/fips_cert_internal/bn_common.h b/sources/include/fips_cert_internal/bn_common.h
index 6a36dfa8..a2e8dc63 100644
--- a/sources/include/fips_cert_internal/bn_common.h
+++ b/sources/include/fips_cert_internal/bn_common.h
@@ -31,9 +31,9 @@
  * \param[in] sgn sign of big number
  * \param[in] pdata pointer to integer big number
  * \param[in] data_word_len length of integer big number in 32bit size
- * 
+ *
  */
-__INLINE IppStatus ippcp_init_set_bn(IppsBigNumState *pbn, int max_word_len,
+__IPPCP_INLINE IppStatus ippcp_init_set_bn(IppsBigNumState *pbn, int max_word_len,
                                      IppsBigNumSGN sgn, const Ipp32u *pdata, int data_word_len)
 {
   IppStatus sts;
diff --git a/sources/include/ippres.gen b/sources/include/ippres.gen
index 0b145050..3ba40923 100644
--- a/sources/include/ippres.gen
+++ b/sources/include/ippres.gen
@@ -42,7 +42,7 @@ BEGIN
         BLOCK "040904b0"
         BEGIN
             VALUE "CompanyName", "Intel Corporation.\0"
-            VALUE "FileVersion", STR( VERSION() ) "\0"
+            VALUE "FileVersion", STR_FILE_VERSION() "\0"
             VALUE "ProductName", IPP_LIB_SHORTNAME() ". Intel(R) Integrated Performance Primitives. " IPP_LIB_LONGNAME() ".\0"
             VALUE "ProductVersion", STR_VERSION() "\0"
             VALUE "LegalCopyright", "Copyright (C) 1999-2021, Intel Corporation. All rights reserved.\0"
diff --git a/sources/include/ippver.h b/sources/include/ippver.h
index 0b2f5ab8..52d1a278 100644
--- a/sources/include/ippver.h
+++ b/sources/include/ippver.h
@@ -30,6 +30,10 @@
 #define STR2(x)           #x
 #define STR(x)       STR2(x)
 
+#ifndef STR_BASE_VERSION
+#define STR_BASE_VERSION() STR(IPP_VERSION_MAJOR) "," STR(IPP_VERSION_MINOR) ", " STR(IPP_VERSION_UPDATE)
+#endif
+
 #ifndef STR_VERSION
  #ifdef IPP_REVISION
   #define STR_VERSION() IPP_VERSION_STR " (r" STR( IPP_REVISION ) ")"
diff --git a/sources/include/lms_internal/lmots.h b/sources/include/lms_internal/lmots.h
new file mode 100644
index 00000000..bb3f480c
--- /dev/null
+++ b/sources/include/lms_internal/lmots.h
@@ -0,0 +1,156 @@
+/*************************************************************************
+* Copyright (C) 2024 Intel Corporation
+*
+* Licensed under the Apache License,  Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* 	http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law  or agreed  to  in  writing,  software
+* distributed under  the License  is  distributed  on  an  "AS IS"  BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the  specific  language  governing  permissions  and
+* limitations under the License.
+*************************************************************************/
+
+#ifndef IPPCP_LMOTS_H_
+#define IPPCP_LMOTS_H_
+
+#include "owndefs.h"
+#include "pcptool.h"
+
+#include "stateful_sig_common/common.h"
+
+/*
+ * LMOTS algorithms params. "Table 1" LMS spec.
+ */
+typedef struct {
+    Ipp32u n;
+    Ipp32u w;
+    Ipp32u p;
+    Ipp32u ls;
+    IppsHashMethod* hash_method;
+} cpLMOTSParams;
+
+/*
+ * Standard data format for LMOTS signature
+ *  |  4 bytes   || n bytes || n bytes || n bytes ||...|| n bytes |
+ *  | otssigtype ||    C    ||   Y[0]  ||   Y[1]  ||...||  Y[p-1] |
+ */
+typedef struct {
+    IppsLMOTSAlgo _lmotsOIDAlgo;
+    Ipp8u* pC;
+    Ipp8u* pY;
+} _cpLMOTSSignatureState;
+
+/*
+ * Set LMOTS parameters
+ *
+ * Returns:                Reason:
+ *    ippStsBadArgErr         lmotsOIDAlgo > Max value for IppsLMOTSAlgo
+ *                            lmotsOIDAlgo <= 0
+ *    ippStsNoErr             no errors
+ *
+ * Input parameters:
+ *    lmotsOIDAlgo   id of LMOTS set of parameters
+ *
+ * Output parameters:
+ *    params         LMOTS parameters (w, p, ls, n, hash_method)
+ */
+__IPPCP_INLINE IppStatus setLMOTSParams(IppsLMOTSAlgo lmotsOIDAlgo, cpLMOTSParams* params) {
+    switch (lmotsOIDAlgo) {
+        case LMOTS_SHA256_N32_W1: {
+            params->w = 1;
+            params->p = 265;
+            params->ls = 7;
+            break;
+        }
+        case LMOTS_SHA256_N32_W2: {
+            params->w = 2;
+            params->p = 133;
+            params->ls = 6;
+            break;
+        }
+        case LMOTS_SHA256_N32_W4: {
+            params->w = 4;
+            params->p = 67;
+            params->ls = 4;
+            break;
+        }
+        case LMOTS_SHA256_N32_W8: {
+            params->w = 8;
+            params->p = 34;
+            params->ls = 0;
+            break;
+        }
+        case LMOTS_SHA256_N24_W1: {
+            params->w = 1;
+            params->p = 200;
+            params->ls = 8;
+            break;
+        }
+        case LMOTS_SHA256_N24_W2: {
+            params->w = 2;
+            params->p = 101;
+            params->ls = 6;
+            break;
+        }
+        case LMOTS_SHA256_N24_W4 : {
+            params->w = 4;
+            params->p = 51;
+            params->ls = 4;
+            break;
+        }
+        case LMOTS_SHA256_N24_W8 : {
+            params->w = 8;
+            params->p = 26;
+            params->ls = 0;
+            break;
+        }
+        default: return ippStsBadArgErr;
+    }
+    params->hash_method = (IppsHashMethod*) ippsHashMethod_SHA256_TT();
+
+    if(lmotsOIDAlgo <= LMOTS_SHA256_N32_W8) {
+        params->n = 32;
+    }
+    else {
+        params->n = 24;
+    }
+    return ippStsNoErr;
+}
+
+/*
+ * f(S, i, w) is the i-th, w-bit value, if S
+ * is interpreted as a sequence of w-bit values
+ *
+ * Input parameters:
+ *    S    a string to calculate coef
+ *    i    output element position
+ *    w    the length of the output element
+ *
+ * Output parameters:
+ *    Target element of a specified length
+ *
+ */
+__IPPCP_INLINE Ipp32u cpCoef(Ipp8u* S, Ipp32u i, Ipp32u w) {
+    return ((1 << w) - 1) & ( S[(i * w) / 8] >> (8 - (w * (i % (8 / w)) + w)));
+}
+
+__IPPCP_INLINE Ipp32u cpCksm(Ipp8u* S, cpLMOTSParams lmotsParams) {
+    Ipp32u w = lmotsParams.w;
+    Ipp32u n = lmotsParams.n;
+    Ipp32u ls = lmotsParams.ls;
+
+    Ipp32u cksmQ = 0; //sum is a 16-bit unsigned integer
+    Ipp32u cksmItrLimit = (8 * n) / w;
+    for (Ipp32u i = 0; i < cksmItrLimit; i++) {
+        cksmQ = cksmQ + ((1 << w) - 1) - cpCoef(S, i, w);
+    }
+    cksmQ = cksmQ << ls;
+
+    return cksmQ;
+}
+
+#endif /* #ifndef IPPCP_LMOTS_H_ */
diff --git a/sources/include/lms_internal/lms.h b/sources/include/lms_internal/lms.h
new file mode 100644
index 00000000..94a341d8
--- /dev/null
+++ b/sources/include/lms_internal/lms.h
@@ -0,0 +1,112 @@
+/*************************************************************************
+* Copyright (C) 2024 Intel Corporation
+*
+* Licensed under the Apache License,  Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* 	http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law  or agreed  to  in  writing,  software
+* distributed under  the License  is  distributed  on  an  "AS IS"  BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the  specific  language  governing  permissions  and
+* limitations under the License.
+*************************************************************************/
+
+#ifndef IPPCP_LMS_H_
+#define IPPCP_LMS_H_
+
+#include "owndefs.h"
+#include "owncp.h"
+#include "lms_internal/lmots.h"
+
+#define CP_CKSM_BYTESIZE         (2)
+#define CP_PK_I_BYTESIZE         (16)
+#define CP_LMS_MAX_HASH_BYTESIZE (32)
+#define CP_SIG_MAX_Y_WORDSIZE    (265)
+
+/* Constants used to distinguish hashes in the system */
+#define D_PBLC (0x8080)
+#define D_MESG (0x8181)
+#define D_LEAF (0x8282)
+#define D_INTR (0x8383)
+
+/* LMS algorithms params. "Table 2" LMS spec. */
+typedef struct {
+    Ipp32u m;
+    Ipp32u h;
+    IppsHashMethod* hash_method;
+} cpLMSParams;
+
+/*
+ * Standard format of LMS public key:
+ *  | u32str(type) || u32str(otstype) ||    I     ||   T[1]    |
+ *  |    4 bytes   ||     4 bytes     || 16 bytes ||  n bytes  |
+*/
+struct _cpLMSPublicKeyState {
+    Ipp32u _idCtx;           // Pub key ctx identifier
+    IppsLMSAlgo   lmsOIDAlgo;
+    IppsLMOTSAlgo lmotsOIDAlgo;
+    Ipp8u I[CP_PK_I_BYTESIZE];
+    Ipp8u* T1;
+};
+
+/*
+ * Standard data format for LMS signature
+ *  |  4 bytes  ||    ...    ||   4 bytes   ||  n bytes ||  n bytes ||...||  n bytes  |
+ *  |     q     || lmots_sig || lms_sigtype ||  path[0] ||  path[1] ||...|| path[h-1] |
+ */
+struct _cpLMSSignatureState {
+    Ipp32u _idCtx;  // Signature ctx identifier
+    Ipp32u _q;
+    _cpLMOTSSignatureState _lmotsSig;
+    IppsLMSAlgo _lmsOIDAlgo;
+    Ipp8u* _pAuthPath;
+    // path[0] ||  path[1] ||...||  path[h-1]
+    //                  C
+    //   Y[0]   ||   Y[1]   ||...||  Y[p-1]
+};
+
+/* Defines to handle contexts IDs */
+#define CP_LMS_SET_CTX_ID(ctx)    ((ctx)->_idCtx = (Ipp32u)idCtxLMS ^ (Ipp32u)IPP_UINT_PTR(ctx))
+#define CP_LMS_VALID_CTX_ID(ctx)  ((((ctx)->_idCtx) ^ (Ipp32u)IPP_UINT_PTR(ctx)) == (Ipp32u)idCtxLMS)
+
+/*
+ * Set LMS parameters
+ *
+ * Returns:                Reason:
+ *    ippStsBadArgErr         lmsOIDAlgo > Max value for IppsLMSAlgo
+ *                            lmsOIDAlgo < Min value for IppsLMSAlgo
+ *    ippStsNoErr             no errors
+ *
+ * Input parameters:
+ *    lmsOIDAlgo    id of LMS set of parameters
+ *
+ * Output parameters:
+ *    params    LMS parameters (h, m, hash_method)
+ */
+__IPPCP_INLINE IppStatus setLMSParams(IppsLMSAlgo lmsOIDAlgo, cpLMSParams* params) {
+    /* Set h */
+    switch (lmsOIDAlgo % 5) {
+        case 0: { params->h = 5;  break; } // LMS_SHA256_M32_H5  and LMS_SHA256_M24_H5
+        case 1: { params->h = 10; break; } // LMS_SHA256_M32_H10 and LMS_SHA256_M24_H10
+        case 2: { params->h = 15; break; } // LMS_SHA256_M32_H15 and LMS_SHA256_M24_H15
+        case 3: { params->h = 20; break; } // LMS_SHA256_M32_H20 and LMS_SHA256_M24_H20
+        case 4: { params->h = 25; break; } // LMS_SHA256_M32_H25 and LMS_SHA256_M24_H25
+        default: return ippStsBadArgErr;
+    }
+
+    if(lmsOIDAlgo <= LMS_SHA256_M32_H25) {
+        params->m = 32;
+    }
+    else {
+        params->m = 24;
+    }
+
+    params->hash_method = (IppsHashMethod*) ippsHashMethod_SHA256_TT();
+
+    return ippStsNoErr;
+}
+
+#endif /* #ifndef IPPCP_LMS_H_ */
diff --git a/sources/include/owndefs.h b/sources/include/owndefs.h
index 7fa1a040..15df89d1 100644
--- a/sources/include/owndefs.h
+++ b/sources/include/owndefs.h
@@ -34,15 +34,15 @@
   #include "ippcpdefs.h"
 #endif
 
-#if !defined(__INLINE)
+#if !defined(__IPPCP_INLINE)
 #if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER) || defined(_MSC_VER)
-  #define __INLINE static __inline
+  #define __IPPCP_INLINE static __inline
 #elif defined( __GNUC__ )
-  #define __INLINE static __inline__
+  #define __IPPCP_INLINE static __inline__
 #else
-  #define __INLINE static
+  #define __IPPCP_INLINE static
 #endif
-#endif /*__INLINE*/
+#endif /*__IPPCP_INLINE*/
 
 /* TODO: to check ICX compiler */
 #if !defined(__NOINLINE)
@@ -59,7 +59,7 @@
 #if defined(_MSC_VER)
   #define __FORCEINLINE __forceinline
 #elif defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER) || defined( __GNUC__ )
-  #define __FORCEINLINE __INLINE __attribute__((always_inline))
+  #define __FORCEINLINE __IPPCP_INLINE __attribute__((always_inline))
 #else
   #define __FORCEINLINE
 #endif
@@ -267,7 +267,7 @@
 #endif
 
 #if ((_IPP_ARCH == _IPP_ARCH_IA32))
-__INLINE Ipp32s IPP_INT_PTR ( const void* ptr )
+__IPPCP_INLINE Ipp32s IPP_INT_PTR ( const void* ptr )
 {
     union {
         void*   Ptr;
@@ -277,7 +277,7 @@ __INLINE Ipp32s IPP_INT_PTR ( const void* ptr )
     return dd.Int;
 }
 
-__INLINE Ipp32u IPP_UINT_PTR( const void* ptr )
+__IPPCP_INLINE Ipp32u IPP_UINT_PTR( const void* ptr )
 {
     union {
         void*   Ptr;
@@ -287,7 +287,7 @@ __INLINE Ipp32u IPP_UINT_PTR( const void* ptr )
     return dd.Int;
 }
 #elif ((_IPP_ARCH == _IPP_ARCH_EM64T) || (_IPP_ARCH == _IPP_ARCH_LRB2))
-__INLINE Ipp64s IPP_INT_PTR( const void* ptr )
+__IPPCP_INLINE Ipp64s IPP_INT_PTR( const void* ptr )
 {
     union {
         void*   Ptr;
@@ -297,7 +297,7 @@ __INLINE Ipp64s IPP_INT_PTR( const void* ptr )
     return dd.Int;
 }
 
-__INLINE Ipp64u IPP_UINT_PTR( const void* ptr )
+__IPPCP_INLINE Ipp64u IPP_UINT_PTR( const void* ptr )
 {
     union {
         void*    Ptr;
@@ -386,7 +386,8 @@ typedef enum {
     idCtxSM3,
     idCtxAESXTS,
     idxCtxECES_SM2,
-    idCtxGFPECKE
+    idCtxGFPECKE,
+    idCtxLMS
 } IppCtxId;
 
 
diff --git a/sources/include/stateful_sig_common/common.h b/sources/include/stateful_sig_common/common.h
new file mode 100644
index 00000000..7f4bf150
--- /dev/null
+++ b/sources/include/stateful_sig_common/common.h
@@ -0,0 +1,34 @@
+/*************************************************************************
+* Copyright (C) 2024 Intel Corporation
+*
+* Licensed under the Apache License,  Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* 	http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law  or agreed  to  in  writing,  software
+* distributed under  the License  is  distributed  on  an  "AS IS"  BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the  specific  language  governing  permissions  and
+* limitations under the License.
+*************************************************************************/
+
+/*
+ * Represent the `in` value as the `out` array that length is `outlen`
+ * !Works only for big-endian data!
+ *
+ * Input parameters:
+ *    outlen   length of resulted array
+ *    in       value that needs to be represent as an array
+ * Output parameters:
+ *    out      resulted array of bytes
+ */
+
+__IPPCP_INLINE void toByte(Ipp8u *out, Ipp32s outlen, Ipp32u in) {
+    /* Iterate over out in decreasing order, for big-endianness. */
+    for (Ipp32s i = outlen - 1; i >= 0; i--) {
+        out[i] = (Ipp8u)(in & 0xff);
+        in = in >> /*bitsize of 1 byte*/ 8;
+    }
+}
diff --git a/sources/include/utils.inc b/sources/include/utils.inc
index 8d36d1aa..ac8dda6b 100644
--- a/sources/include/utils.inc
+++ b/sources/include/utils.inc
@@ -58,7 +58,7 @@
 ; The macro searches intersection between two lists.
 ; Input: two comma-separated lists, enclosed in curly braces.
 ; Output:
-; - Intersection will be located in the %$instersection context macro (can be empty).
+; - Intersection will be located in the %$intersection context macro (can be empty).
 ; - Count of intersection elements list will be stored in the %$cardinality context variable.
 %macro INTERSECT 2.nolist
   %ifnctx _INTERSECT_CTX_
diff --git a/sources/include/xmss_internal/wots.h b/sources/include/xmss_internal/wots.h
index 25dc197b..89cc8832 100644
--- a/sources/include/xmss_internal/wots.h
+++ b/sources/include/xmss_internal/wots.h
@@ -20,6 +20,8 @@
 #include "owndefs.h"
 #include "pcptool.h"
 
+#include "stateful_sig_common/common.h"
+
 // WOTS+ algorithms params. See 3.1.1. XMSS spec.
 typedef struct {
     Ipp32s n;
@@ -60,7 +62,7 @@ IPP_OWN_DECL(IppStatus, WOTS_pkFromSig, (const Ipp8u* M, Ipp8u* sig, Ipp8u* pSee
  *    adrs      changed array of bytes
  */
 
-__INLINE void set_adrs_idx(Ipp8u* adrs, Ipp32u idx, int word_id){
+__IPPCP_INLINE void set_adrs_idx(Ipp8u* adrs, Ipp32u idx, int word_id){
     adrs[4 * word_id + 3] = (Ipp8u) idx        & 0xff;
     adrs[4 * word_id + 2] = (Ipp8u)(idx >>  8) & 0xff;
     adrs[4 * word_id + 1] = (Ipp8u)(idx >> 16) & 0xff;
@@ -77,28 +79,10 @@ __INLINE void set_adrs_idx(Ipp8u* adrs, Ipp32u idx, int word_id){
  *    word_id   int32 idx in the adrs array
  */
 
-__INLINE Ipp8u set_adrs_1_byte(int word_id){
+__IPPCP_INLINE Ipp8u set_adrs_1_byte(int word_id){
     return (Ipp8u)(4 * word_id + 3);
 }
 
-/*
- * Represent the `in` value as the `out` array that length is `outlen`
- *
- * Input parameters:
- *    outlen   length of resulted array
- *    in       value that needs to be represent as an array
- * Output parameters:
- *    out      resulted array of bytes
- */
-
-__INLINE void toByte(Ipp8u *out, Ipp32s outlen, Ipp32u in) {
-    /* Iterate over out in decreasing order, for big-endianness. */
-    for (Ipp32s i = outlen - 1; i >= 0; i--) {
-        out[i] = (Ipp8u)(in & 0xff);
-        in = in >> /*bitsize of 1 byte*/ 8;
-    }
-}
-
 /*
  * Implement a ceil function that returns the smallest integer greater than or equal to x.
  *
@@ -106,7 +90,7 @@ __INLINE void toByte(Ipp8u *out, Ipp32s outlen, Ipp32u in) {
  *    x   double precision floating point value
  */
 
-__INLINE Ipp32s cpCeil(double x) {
+__IPPCP_INLINE Ipp32s cpCeil(double x) {
     Ipp32s int_val = (Ipp32s) x;
     if(int_val == x || x <= 0.0){
         return int_val;
diff --git a/sources/include/xmss_internal/xmss.h b/sources/include/xmss_internal/xmss.h
index 6145cda7..b765bd74 100644
--- a/sources/include/xmss_internal/xmss.h
+++ b/sources/include/xmss_internal/xmss.h
@@ -98,7 +98,7 @@ IPP_OWN_DECL(IppStatus, rand_hash, (Ipp8u* left, Ipp8u* right, Ipp8u* seed,
  *    params    WOTS parameters (w, log2_w, n, len, len_1, hash_method)
  */
 
-__INLINE IppStatus setXMSSParams(IppsXMSSAlgo OIDAlgo, Ipp32s* h, cpWOTSParams* params) {
+__IPPCP_INLINE IppStatus setXMSSParams(IppsXMSSAlgo OIDAlgo, Ipp32s* h, cpWOTSParams* params) {
 
     // Digits below are from the XMSS algo spec
     // don't depend on the algo
diff --git a/sources/ippcp/CMakeLists.txt b/sources/ippcp/CMakeLists.txt
index 82389d48..18c9be24 100644
--- a/sources/ippcp/CMakeLists.txt
+++ b/sources/ippcp/CMakeLists.txt
@@ -184,7 +184,7 @@ if(IPP_REVISION)
 endif()
 
 # Enable tech-preview feature in the library
-set(LIBRARY_DEFINES "${LIBRARY_DEFINES} -DIPPCP_PREVIEW_XMSS")
+set(LIBRARY_DEFINES "${LIBRARY_DEFINES} -DIPPCP_PREVIEW_XMSS -DIPPCP_PREVIEW_LMS")
 
 set(LIBRARY_DEFINES "${LIBRARY_DEFINES} -D_NO_IPP_DEPRECATED") # do not warn about ippcp deprecated functions
 # set BN_OPENSSL_DISABLE for Intel IPP Cryptography
@@ -232,6 +232,7 @@ file(GLOB LIBRARY_C_SOURCES_ORIGINAL
     ${IPP_CRYPTO_SOURCES_DIR}/ecnist/*.c
     ${IPP_CRYPTO_SOURCES_DIR}/sm2/*.c
     ${IPP_CRYPTO_SOURCES_DIR}/xmss/*.c
+    ${IPP_CRYPTO_SOURCES_DIR}/lms/*.c
 )
 
 file(GLOB LIBRARY_ASM_SOURCES_ORIGINAL
diff --git a/sources/ippcp/aes_gcm_avx512_structures.h b/sources/ippcp/aes_gcm_avx512_structures.h
index 9be2fbe6..cc16b8f3 100644
--- a/sources/ippcp/aes_gcm_avx512_structures.h
+++ b/sources/ippcp/aes_gcm_avx512_structures.h
@@ -14,14 +14,14 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
-//     AES GCM otimized for AVX512 and AVX512-VAES features
+//     AES GCM optimized for AVX512 and AVX512-VAES features
 //     Internal Definitions
-// 
-// 
+//
+//
 */
 
 #ifndef __AES_GCM_AVX512_STRUCTURES_H_
diff --git a/sources/ippcp/asm_ia32/pcpaesgcmg9as.asm b/sources/ippcp/asm_ia32/pcpaesgcmg9as.asm
index 65050014..ad10136c 100644
--- a/sources/ippcp/asm_ia32/pcpaesgcmg9as.asm
+++ b/sources/ippcp/asm_ia32/pcpaesgcmg9as.asm
@@ -170,6 +170,91 @@ IPPASM AesGcmPrecompute_avx,PUBLIC
    ret
 ENDFUNC AesGcmPrecompute_avx
 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;
+;;; void AesGcmPrecompute_avx2_vaes(const Ipp8u* pRefHkey, Ipp8u* pMultipliers);
+;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+align IPP_ALIGN_FACTOR
+IPPASM AesGcmPrecompute_avx2_vaes,PUBLIC
+  USES_GPR esi
+
+%xdefine pHkey        [esp + ARG_1 + 0*sizeof(dword)] ; pointer to the reflected hkey
+%xdefine pMultipliers [esp + ARG_1 + 1*sizeof(dword)] ; output to the precomputed multipliers
+
+   LD_ADDR  esi, CONST_TABLE
+
+   mov      eax, pHkey
+   movdqu   xmm0, oword [eax]   ;  xmm0 holds HashKey
+   pshufb   xmm0, u128_str
+
+   ; precompute HashKey<<1 mod poly from the HashKey
+   movdqa   xmm4, xmm0
+   psllq    xmm0, 1
+   psrlq    xmm4, 63
+   movdqa   xmm3, xmm4
+   pslldq   xmm4, 8
+   psrldq   xmm3, 8
+   por      xmm0, xmm4
+   ;reduction
+   pshufd   xmm4, xmm3, 00100100b
+   pcmpeqd  xmm4, oword TWOONE  ; TWOONE = 0x00000001000000000000000000000001
+   pand     xmm4, oword POLY
+   pxor     xmm0, xmm4              ; xmm0 holds the HashKey<<1 mod poly
+
+   mov      eax, pMultipliers
+   movdqu   oword [eax+sizeof_oword_*0], xmm0
+
+   movdqa         xmm1, xmm0
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^2)<<1 mod poly
+   movdqu   oword [eax+sizeof_oword_*1], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^3)<<1 mod poly
+   movdqu   oword [eax+sizeof_oword_*2], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^4)<<1 mod poly
+   movdqu   oword [eax+sizeof_oword_*3], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^5)<<1 mod poly
+   movdqu   oword [eax+sizeof_oword_*4], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^6)<<1 mod poly
+   movdqu   oword [eax+sizeof_oword_*5], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^7)<<1 mod poly
+   movdqu   oword [eax+sizeof_oword_*6], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^8)<<1 mod poly
+   movdqu   oword [eax+sizeof_oword_*7], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^9)<<1 mod poly
+   movdqu   oword [eax+sizeof_oword_*8], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^10)<<1 mod poly
+   movdqu   oword [eax+sizeof_oword_*9], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^11)<<1 mod poly
+   movdqu   oword [eax+sizeof_oword_*10], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^12)<<1 mod poly
+   movdqu   oword [eax+sizeof_oword_*11], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^13)<<1 mod poly
+   movdqu   oword [eax+sizeof_oword_*12], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^14)<<1 mod poly
+   movdqu   oword [eax+sizeof_oword_*13], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^15)<<1 mod poly
+   movdqu   oword [eax+sizeof_oword_*14], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^16)<<1 mod poly
+   movdqu   oword [eax+sizeof_oword_*15], xmm1
+
+   REST_GPR
+   ret
+ENDFUNC AesGcmPrecompute_avx2_vaes
+
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;;
diff --git a/sources/ippcp/asm_ia32/pcpbnu.inc b/sources/ippcp/asm_ia32/pcpbnu.inc
index 81a171cf..1bbefcc9 100644
--- a/sources/ippcp/asm_ia32/pcpbnu.inc
+++ b/sources/ippcp/asm_ia32/pcpbnu.inc
@@ -29,7 +29,7 @@
 ;
 
 ;;
-;; CMP_BNU     comare arbitrary BNUs
+;; CMP_BNU     compare arbitrary BNUs
 ;;
 ;; input
 ;;    rSrc1    points BNU1
diff --git a/sources/ippcp/asm_ia32/pcprij128safedecm5as.asm b/sources/ippcp/asm_ia32/pcprij128safedecm5as.asm
index 3a6460d2..f57be534 100644
--- a/sources/ippcp/asm_ia32/pcprij128safedecm5as.asm
+++ b/sources/ippcp/asm_ia32/pcprij128safedecm5as.asm
@@ -311,7 +311,7 @@ IPPASM Safe2Decrypt_RIJ128,PUBLIC
   mov   ebp, esp ; save original esp to use it to reach parameters
 
 %xdefine pInp    [ebp + ARG_1 + 0*sizeof(dword)] ; input buffer
-%xdefine pOut    [ebp + ARG_1 + 1*sizeof(dword)] ; outpu buffer
+%xdefine pOut    [ebp + ARG_1 + 1*sizeof(dword)] ; output buffer
 %xdefine nrounds [ebp + ARG_1 + 2*sizeof(dword)] ; number of rounds
 %xdefine pRK     [ebp + ARG_1 + 3*sizeof(dword)] ; round keys
 %xdefine pSbox   [ebp + ARG_1 + 4*sizeof(dword)] ; S-box
diff --git a/sources/ippcp/asm_ia32/pcpvariant.inc b/sources/ippcp/asm_ia32/pcpvariant.inc
index b458b867..64b5c32e 100644
--- a/sources/ippcp/asm_ia32/pcpvariant.inc
+++ b/sources/ippcp/asm_ia32/pcpvariant.inc
@@ -44,7 +44,7 @@
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;;
-;; it possible to force use of C-version of some implementtaions
+;; it possible to force use of C-version of some implementations
 ;; instead of ASM one
 ;;
 %ifndef _USE_C_cpAdd_BNU_
diff --git a/sources/ippcp/asm_intel64/pcpaesgcme9as.asm b/sources/ippcp/asm_intel64/pcpaesgcme9as.asm
index c80be6aa..62384c3b 100644
--- a/sources/ippcp/asm_intel64/pcpaesgcme9as.asm
+++ b/sources/ippcp/asm_intel64/pcpaesgcme9as.asm
@@ -221,7 +221,7 @@ my_pclmulqdq   %%tmpX1, %%HK,   11h     ;; tmpX1 = a1*b1               xmm1 = GH
 
 %if (_IPP32E >= _IPP32E_Y8)
 
-segment .text align=IPP_ALIGN_FACTOR
+segment .data align=IPP_ALIGN_FACTOR
 
 align IPP_ALIGN_FACTOR
 POLY        DQ    00000000000000001h,0C200000000000000h  ;; 0xC2000000000000000000000000000001
@@ -231,11 +231,12 @@ MASK1       DQ    0ffffffffffffffffh,00000000000000000h  ;; 0x0000000000000000ff
 MASK2       DQ    00000000000000000h,0ffffffffffffffffh  ;; 0xffffffffffffffff0000000000000000
 INC_1       DQ    1,0
 
+segment .text align=IPP_ALIGN_FACTOR
 %assign sizeof_oword_  (16)
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;;
-;;; void AesGcmPrecomute_avx(Ipp8u* pPrecomutedData, const Ipp8u* pHKey)
+;;; void AesGcmPrecomute_avx(Ipp8u* pPrecomData, const Ipp8u* pHKey)
 ;;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 align IPP_ALIGN_FACTOR
@@ -247,7 +248,7 @@ IPPASM AesGcmPrecompute_avx,PUBLIC
 %xdefine pPrecomData rdi ; (rdi) pointer to the reflected multipliers reflect(hkey),(hkey<<1), (hkey^2)<<1, (hkey^4)<<1,
 %xdefine pHKey       rsi ; (rsi) pointer to the Hkey value
 
-   movdqu   xmm0, oword [rel pHKey] ;  xmm0 holds HashKey
+   movdqu   xmm0, oword [pHKey] ;  xmm0 holds HashKey
    pshufb   xmm0, [rel SHUF_CONST]
   ;movdqu   oword [pPrecomData+sizeof_oword_*0], xmm0
 
@@ -280,6 +281,92 @@ IPPASM AesGcmPrecompute_avx,PUBLIC
    ret
 ENDFUNC AesGcmPrecompute_avx
 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;
+;;; void AesGcmPrecompute_avx2_vaes(Ipp8u* pPrecomputedData, const Ipp8u* pHKey)
+;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; calculates 16 hash keys - HashKey<<1, (HashKey^2)<<1, ..., (HashKey^16)<<1
+align IPP_ALIGN_FACTOR
+IPPASM AesGcmPrecompute_avx2_vaes,PUBLIC
+        USES_GPR rdi,rsi
+        USES_XMM xmm6,xmm7,xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15
+        COMP_ABI 2
+
+%xdefine pPrecomputedData rdi ; (rdi) pointer to the reflected multipliers reflect(hkey),(hkey<<1), (hkey^2)<<1, (hkey^4)<<1,
+%xdefine pHKey       rsi ; (rsi) pointer to the Hkey value
+
+   movdqu   xmm0, oword [pHKey] ;  xmm0 holds HashKey
+   pshufb   xmm0, [rel SHUF_CONST]
+
+   ; precompute HashKey<<1 mod poly from the HashKey
+   movdqa   xmm4, xmm0
+   psllq    xmm0, 1
+   psrlq    xmm4, 63
+   movdqa   xmm3, xmm4
+   pslldq   xmm4, 8
+   psrldq   xmm3, 8
+   por      xmm0, xmm4
+   ;reduction
+   pshufd   xmm4, xmm3, 00100100b
+   pcmpeqd  xmm4, oword [rel TWOONE]     ; [TWOONE] = 0x00000001000000000000000000000001
+   pand     xmm4, oword [rel POLY]
+   pxor     xmm0, xmm4                          ; xmm0 holds the HashKey<<1 mod poly
+
+   movdqu   oword [pPrecomputedData+sizeof_oword_*0], xmm0
+
+   movdqa         xmm1, xmm0
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^2)<<1 mod poly
+   movdqu   oword [pPrecomputedData+sizeof_oword_*1], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^3)<<1 mod poly
+   movdqu   oword [pPrecomputedData+sizeof_oword_*2], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^4)<<1 mod poly
+   movdqu   oword [pPrecomputedData+sizeof_oword_*3], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^5)<<1 mod poly
+   movdqu   oword [pPrecomputedData+sizeof_oword_*4], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^6)<<1 mod poly
+   movdqu   oword [pPrecomputedData+sizeof_oword_*5], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^7)<<1 mod poly
+   movdqu   oword [pPrecomputedData+sizeof_oword_*6], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^8)<<1 mod poly
+   movdqu   oword [pPrecomputedData+sizeof_oword_*7], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^9)<<1 mod poly
+   movdqu   oword [pPrecomputedData+sizeof_oword_*8], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^10)<<1 mod poly
+   movdqu   oword [pPrecomputedData+sizeof_oword_*9], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^11)<<1 mod poly
+   movdqu   oword [pPrecomputedData+sizeof_oword_*10], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^12)<<1 mod poly
+   movdqu   oword [pPrecomputedData+sizeof_oword_*11], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^13)<<1 mod poly
+   movdqu   oword [pPrecomputedData+sizeof_oword_*12], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^14)<<1 mod poly
+   movdqu   oword [pPrecomputedData+sizeof_oword_*13], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^15)<<1 mod poly
+   movdqu   oword [pPrecomputedData+sizeof_oword_*14], xmm1
+
+   sse_clmul_gcm  xmm1, xmm0, xmm3, xmm4, xmm5  ; xmm1 holds (HashKey^16)<<1 mod poly
+   movdqu   oword [pPrecomputedData+sizeof_oword_*15], xmm1
+
+   REST_XMM
+   REST_GPR
+   ret
+ENDFUNC AesGcmPrecompute_avx2_vaes
+
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;;
 ;;; void AesGcmMulGcm_avx(Ipp8u* pHash, const Ipp8u* pHKey)
@@ -294,14 +381,14 @@ IPPASM AesGcmMulGcm_avx,PUBLIC
 %xdefine pHash   rdi         ; (rdi) pointer to the Hash value
 %xdefine pHKey   rsi         ; (rsi) pointer to the (hkey<<1) value
 
-   movdqa   xmm0, oword [rel pHash]
+   movdqa   xmm0, oword [pHash]
    pshufb   xmm0, [rel SHUF_CONST]
-   movdqa   xmm1, oword [rel pHKey]
+   movdqa   xmm1, oword [pHKey]
 
    sse_clmul_gcm  xmm0, xmm1, xmm2, xmm3, xmm4  ; xmm0 holds Hash*HKey mod poly
 
    pshufb   xmm0, [rel SHUF_CONST]
-   movdqa   oword [rel pHash], xmm0
+   movdqa   oword [pHash], xmm0
 
    REST_XMM
    REST_GPR
@@ -326,15 +413,15 @@ IPPASM AesGcmAuth_avx,PUBLIC
 
 %assign  BYTES_PER_BLK (16)
 
-   movdqa   xmm0, oword [rel pHash]
+   movdqa   xmm0, oword [pHash]
    pshufb   xmm0, [rel SHUF_CONST]
-   movdqa   xmm1, oword [rel pHKey]
+   movdqa   xmm1, oword [pHKey]
 
    movsxd   rdx, edx
 
 align IPP_ALIGN_FACTOR
 .auth_loop:
-   movdqu   xmm2, oword [rel pSrc]  ; src[]
+   movdqu   xmm2, oword [pSrc]  ; src[]
    pshufb   xmm2, [rel SHUF_CONST]
    add      pSrc, BYTES_PER_BLK
    pxor     xmm0, xmm2              ; hash ^= src[]
diff --git a/sources/ippcp/asm_intel64/pcpaesgcmtable2ku8as.asm b/sources/ippcp/asm_intel64/pcpaesgcmtable2ku8as.asm
index a9674477..9a9139a5 100644
--- a/sources/ippcp/asm_intel64/pcpaesgcmtable2ku8as.asm
+++ b/sources/ippcp/asm_intel64/pcpaesgcmtable2ku8as.asm
@@ -30,7 +30,7 @@
 
 %if (_IPP32E >= _IPP32E_U8)
 
-segment .text align=IPP_ALIGN_FACTOR
+segment .data align=IPP_ALIGN_FACTOR
 
 ;
 ; getAesGcmConst_table_ct provides c-e-t access to pre-computed Ipp16u AesGcmConst_table[256]
@@ -49,6 +49,7 @@ align IPP_ALIGN_FACTOR
 INIT_IDX    dw    000h,001h,002h,003h,004h,005h,006h,007h   ;; initial search inx = {0:1:2:3:4:5:6:7}
 INCR_IDX    dw    008h,008h,008h,008h,008h,008h,008h,008h   ;; index increment = {8:8:8:8:8:8:8:8}
 
+segment .text align=IPP_ALIGN_FACTOR
 align IPP_ALIGN_FACTOR
 IPPASM getAesGcmConst_table_ct,PRIVATE
    pxor     xmm2, xmm2                 ;; accumulator xmm2 = 0
diff --git a/sources/ippcp/asm_intel64/pcpbnum7.inc b/sources/ippcp/asm_intel64/pcpbnum7.inc
index acd6ff93..9657f25e 100644
--- a/sources/ippcp/asm_intel64/pcpbnum7.inc
+++ b/sources/ippcp/asm_intel64/pcpbnum7.inc
@@ -111,7 +111,7 @@
 %endmacro
 
 ;;
-;; CMP_BNU     comare BNUs
+;; CMP_BNU     compare BNUs
 ;;
 ;; input
 ;;    rSrc1    points BNU1
diff --git a/sources/ippcp/asm_intel64/pcpmred.inc b/sources/ippcp/asm_intel64/pcpmred.inc
index 5bbc8e4a..bbbd1adc 100644
--- a/sources/ippcp/asm_intel64/pcpmred.inc
+++ b/sources/ippcp/asm_intel64/pcpmred.inc
@@ -474,7 +474,7 @@ DECLARE_FUNC mred_N,PRIVATE
 
    lea      rdi, [rdi+rdx*sizeof(qword)]
 
-   ; accumulate carryLCL and update hight product above
+   ; accumulate carryLCL and update high product above
    pop      rax
    shr      rax, 1
    mov      rbx, rdx
@@ -502,7 +502,7 @@ DECLARE_FUNC mred_N,PRIVATE
 
    pop      rbp         ; mul_8xn procedure
 
-   ; accumulate carryGBL and store hight product above
+   ; accumulate carryGBL and store high product above
    pop      rbx         ; carryGBL
    add      r8,  rbx
    mov      qword [rdi+sizeof(qword)*0], r8
diff --git a/sources/ippcp/asm_intel64/pcprij128ccme9as.asm b/sources/ippcp/asm_intel64/pcprij128ccme9as.asm
index 2198333a..450c9b31 100644
--- a/sources/ippcp/asm_intel64/pcprij128ccme9as.asm
+++ b/sources/ippcp/asm_intel64/pcprij128ccme9as.asm
@@ -31,12 +31,13 @@
 %if (_AES_NI_ENABLING_ == _FEATURE_ON_) || (_AES_NI_ENABLING_ == _FEATURE_TICKTOCK_)
 %if (_IPP32E >= _IPP32E_Y8)
 
-segment .text align=IPP_ALIGN_FACTOR
+segment .data align=IPP_ALIGN_FACTOR
 
 align IPP_ALIGN_FACTOR
 u128_str    DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
 increment   DQ 1,0
 
+segment .text align=IPP_ALIGN_FACTOR
 ;***************************************************************
 ;* Purpose:    Authenticate and Encrypt
 ;*
diff --git a/sources/ippcp/asm_intel64/pcprij128encryptctrpipee9as.asm b/sources/ippcp/asm_intel64/pcprij128encryptctrpipee9as.asm
index 1fb20d81..c0e93766 100644
--- a/sources/ippcp/asm_intel64/pcprij128encryptctrpipee9as.asm
+++ b/sources/ippcp/asm_intel64/pcprij128encryptctrpipee9as.asm
@@ -31,12 +31,13 @@
 %if (_AES_NI_ENABLING_ == _FEATURE_ON_) || (_AES_NI_ENABLING_ == _FEATURE_TICKTOCK_)
 %if (_IPP32E >= _IPP32E_Y8)
 
-segment .text align=IPP_ALIGN_FACTOR
+segment .data align=IPP_ALIGN_FACTOR
 
 
 align IPP_ALIGN_FACTOR
 u128_str DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
 
+segment .text align=IPP_ALIGN_FACTOR
 ;***************************************************************
 ;* Purpose:    pipelined RIJ128 CTR encryption/decryption
 ;*
diff --git a/sources/ippcp/asm_intel64/pcprij128encryptxtse9as.asm b/sources/ippcp/asm_intel64/pcprij128encryptxtse9as.asm
index f86abb2e..1e2d0471 100644
--- a/sources/ippcp/asm_intel64/pcprij128encryptxtse9as.asm
+++ b/sources/ippcp/asm_intel64/pcprij128encryptxtse9as.asm
@@ -30,13 +30,13 @@
 %if (_AES_NI_ENABLING_ == _FEATURE_ON_) || (_AES_NI_ENABLING_ == _FEATURE_TICKTOCK_)
 %if (_IPP32E >= _IPP32E_Y8)
 
-segment .text align=IPP_ALIGN_FACTOR
+segment .data align=IPP_ALIGN_FACTOR
 
 align IPP_ALIGN_FACTOR
 
 ALPHA_MUL_CNT  dq 00000000000000087h, 00000000000000001h
 
-
+segment .text align=IPP_ALIGN_FACTOR
 ;***************************************************************
 ;* Purpose:    AES-XTS encryption
 ;*
diff --git a/sources/ippcp/asm_intel64/pcprij128safedecu8as.asm b/sources/ippcp/asm_intel64/pcprij128safedecu8as.asm
index 0648b82b..c6c44880 100644
--- a/sources/ippcp/asm_intel64/pcprij128safedecu8as.asm
+++ b/sources/ippcp/asm_intel64/pcprij128safedecu8as.asm
@@ -107,9 +107,7 @@
    PLOOKUP_MEM    %%xmmB, %%xmmT, GF16_expTbl
 %endmacro
 
-segment .text align=IPP_ALIGN_FACTOR
-
-
+segment .data align=IPP_ALIGN_FACTOR
 align IPP_ALIGN_FACTOR
 
 DECODE_DATA:
@@ -268,7 +266,7 @@ GF16mul_2_6x \
 ColumnROR    \
    DB 1,2,3,0,5,6,7,4,9,10,11,8,13,14,15,12
 
-
+segment .text align=IPP_ALIGN_FACTOR
 align IPP_ALIGN_FACTOR
 ;*************************************************************
 ;* void SafeDecrypt_RIJ128(const Ipp8u* pInpBlk,
diff --git a/sources/ippcp/asm_intel64/pcprij128safeencu8as.asm b/sources/ippcp/asm_intel64/pcprij128safeencu8as.asm
index 5d3827b9..d75f112b 100644
--- a/sources/ippcp/asm_intel64/pcprij128safeencu8as.asm
+++ b/sources/ippcp/asm_intel64/pcprij128safeencu8as.asm
@@ -107,7 +107,7 @@
    PLOOKUP_MEM    %%xmmB, %%xmmT, GF16_expTbl
 %endmacro
 
-segment .text align=IPP_ALIGN_FACTOR
+segment .data align=IPP_ALIGN_FACTOR
 
 
 align IPP_ALIGN_FACTOR
@@ -256,6 +256,7 @@ ColumnROR    \
    DB 1,2,3,0,5,6,7,4,9,10,11,8,13,14,15,12
 
 
+segment .text align=IPP_ALIGN_FACTOR
 ;*************************************************************
 ; convert GF(2^128) -> GF((2^4)^2)
 ;*************************************************************
diff --git a/sources/ippcp/asm_intel64/pcpsha1e9as.asm b/sources/ippcp/asm_intel64/pcpsha1e9as.asm
index 164fa7c3..badb442a 100644
--- a/sources/ippcp/asm_intel64/pcpsha1e9as.asm
+++ b/sources/ippcp/asm_intel64/pcpsha1e9as.asm
@@ -290,7 +290,7 @@
      mov    %%hash0, %%hashAdd
 %endmacro
 
-segment .text align=IPP_ALIGN_FACTOR
+segment .data align=IPP_ALIGN_FACTOR
 
 
 align IPP_ALIGN_FACTOR
@@ -305,6 +305,7 @@ shuffle_mask   DD 00010203h
                DD 08090a0bh
                DD 0c0d0e0fh
 
+segment .text align=IPP_ALIGN_FACTOR
 ;*****************************************************************************************
 ;* Purpose:     Update internal digest according to message block
 ;*
diff --git a/sources/ippcp/asm_intel64/pcpsha1l9as.asm b/sources/ippcp/asm_intel64/pcpsha1l9as.asm
index 6ab1b1c1..14b9fdbd 100644
--- a/sources/ippcp/asm_intel64/pcpsha1l9as.asm
+++ b/sources/ippcp/asm_intel64/pcpsha1l9as.asm
@@ -331,7 +331,7 @@
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-segment .text align=IPP_ALIGN_FACTOR
+segment .data align=IPP_ALIGN_FACTOR
 
 
 align IPP_ALIGN_FACTOR
@@ -344,7 +344,7 @@ SHA1_YMM_K     dd 05a827999h, 05a827999h, 05a827999h, 05a827999h, 05a827999h, 05
 SHA1_YMM_BF   dd 00010203h,04050607h,08090a0bh,0c0d0e0fh
               dd 00010203h,04050607h,08090a0bh,0c0d0e0fh
 
-
+segment .text align=IPP_ALIGN_FACTOR
 ;*****************************************************************************************
 ;* Purpose:     Update internal digest according to message block
 ;*
@@ -365,7 +365,7 @@ IPPASM UpdateSHA1,PUBLIC
 
 %xdefine MBS_SHA1    (64)
 
-   mov            r15, rsp                ; store orifinal rsp
+   mov            r15, rsp                ; store original rsp
    and            rsp, -IPP_ALIGN_FACTOR  ; 32-byte aligned stack
 
    movsxd         r14, edx                ; input length in bytes
diff --git a/sources/ippcp/asm_intel64/pcpsha1m7as.asm b/sources/ippcp/asm_intel64/pcpsha1m7as.asm
index dee0e7d1..d3e4cea9 100644
--- a/sources/ippcp/asm_intel64/pcpsha1m7as.asm
+++ b/sources/ippcp/asm_intel64/pcpsha1m7as.asm
@@ -258,9 +258,9 @@
    mov      [rsp+(%%nr & 0Fh)*4],%%regU
 %endmacro
 
-segment .text align=IPP_ALIGN_FACTOR
 
 
+segment .text align=IPP_ALIGN_FACTOR
 ;*****************************************************************************************
 ;* Purpose:     Update internal digest according to message block
 ;*
diff --git a/sources/ippcp/asm_intel64/pcpsha1nias.asm b/sources/ippcp/asm_intel64/pcpsha1nias.asm
index 412cbea4..5d2f21b7 100644
--- a/sources/ippcp/asm_intel64/pcpsha1nias.asm
+++ b/sources/ippcp/asm_intel64/pcpsha1nias.asm
@@ -31,8 +31,7 @@
 %if (_SHA_NI_ENABLING_ == _FEATURE_ON_) || (_SHA_NI_ENABLING_ == _FEATURE_TICKTOCK_)
 ;;%if (_IPP32E >= _IPP32E_Y8 )
 
-segment .text align=IPP_ALIGN_FACTOR
-
+segment .data align=IPP_ALIGN_FACTOR
 
 align IPP_ALIGN_FACTOR
 
@@ -41,6 +40,7 @@ UPPER_DWORD_MASK \
 PSHUFFLE_BYTE_FLIP_MASK \
       DB    15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
 
+segment .text align=IPP_ALIGN_FACTOR
 align IPP_ALIGN_FACTOR
 ;*****************************************************************************************
 ;* Purpose:     Update internal digest according to message block
diff --git a/sources/ippcp/asm_intel64/pcpsha1u8as.asm b/sources/ippcp/asm_intel64/pcpsha1u8as.asm
index 58eccb4f..08355cad 100644
--- a/sources/ippcp/asm_intel64/pcpsha1u8as.asm
+++ b/sources/ippcp/asm_intel64/pcpsha1u8as.asm
@@ -303,7 +303,7 @@
      mov    %%hash0, %%hashAdd
 %endmacro
 
-segment .text align=IPP_ALIGN_FACTOR
+segment .data align=IPP_ALIGN_FACTOR
 
 
 align IPP_ALIGN_FACTOR
@@ -319,6 +319,7 @@ shuffle_mask   DD 00010203h
                DD 0c0d0e0fh
 
 
+segment .text align=IPP_ALIGN_FACTOR
 ;*****************************************************************************************
 ;* Purpose:     Update internal digest according to message block
 ;*
diff --git a/sources/ippcp/asm_intel64/pcpsha256e9as.asm b/sources/ippcp/asm_intel64/pcpsha256e9as.asm
index ba656048..54bcd0c4 100644
--- a/sources/ippcp/asm_intel64/pcpsha256e9as.asm
+++ b/sources/ippcp/asm_intel64/pcpsha256e9as.asm
@@ -281,7 +281,7 @@
 %endif
 %endmacro
 
-segment .text align=IPP_ALIGN_FACTOR
+segment .data align=IPP_ALIGN_FACTOR
 
 
 align IPP_ALIGN_FACTOR
@@ -289,6 +289,7 @@ SHUFB_BSWAP DB    3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12
 SHUFD_ZZ10  DB    0,1,2,3, 8,9,10,11, 0ffh,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh
 SHUFD_32ZZ  DB    0ffh,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh, 0,1,2,3, 8,9,10,11
 
+segment .text align=IPP_ALIGN_FACTOR
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; UpdateSHA256(Ipp32u digest[], Ipp8u dataBlock[], int datalen, Ipp32u K_256[])
diff --git a/sources/ippcp/asm_intel64/pcpsha256l9as.asm b/sources/ippcp/asm_intel64/pcpsha256l9as.asm
index 2496f343..89a6a610 100644
--- a/sources/ippcp/asm_intel64/pcpsha256l9as.asm
+++ b/sources/ippcp/asm_intel64/pcpsha256l9as.asm
@@ -391,7 +391,7 @@ vmovdqa  YMMWORD [rsi+(%%W_AHEAD/4)*sizeof(ymmword)+(%%round/4)*sizeof(ymmword)]
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-segment .text align=IPP_ALIGN_FACTOR
+segment .data align=IPP_ALIGN_FACTOR
 
 
 align IPP_ALIGN_FACTOR
@@ -421,6 +421,7 @@ SHA256_zzBA db 0,1,2,3, 8,9,10,11, 0ffh,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh
             db 0,1,2,3, 8,9,10,11, 0ffh,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh,0ffh
 
 
+segment .text align=IPP_ALIGN_FACTOR
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; UpdateSHA256(Ipp32u digest[], Ipp8u dataBlock[], int datalen, Ipp32u K_256[])
diff --git a/sources/ippcp/asm_intel64/pcpsha256m7as.asm b/sources/ippcp/asm_intel64/pcpsha256m7as.asm
index a8f9643d..b60fe3e8 100644
--- a/sources/ippcp/asm_intel64/pcpsha256m7as.asm
+++ b/sources/ippcp/asm_intel64/pcpsha256m7as.asm
@@ -161,7 +161,6 @@
    mov      [rsp+((%%nr-16) & 0Fh)*4], %%sig0
 %endmacro
 
-segment .text align=IPP_ALIGN_FACTOR
 
 
 ;******************************************************************************************
@@ -191,12 +190,13 @@ segment .text align=IPP_ALIGN_FACTOR
 ;; Caller = ippsHMACSHA224MessageDigest
 ;;
 
-
+segment .data align=IPP_ALIGN_FACTOR
 %if (_IPP32E >= _IPP32E_U8)
 align IPP_ALIGN_FACTOR
 pByteSwp DB    3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12
 %endif
 
+segment .text align=IPP_ALIGN_FACTOR
 align IPP_ALIGN_FACTOR
 IPPASM UpdateSHA256,PUBLIC
 %assign LOCAL_FRAME (16*sizeof(dword) + sizeof(qword))
diff --git a/sources/ippcp/asm_intel64/pcpsha256nias.asm b/sources/ippcp/asm_intel64/pcpsha256nias.asm
index 8c47ab9c..e2d3838a 100644
--- a/sources/ippcp/asm_intel64/pcpsha256nias.asm
+++ b/sources/ippcp/asm_intel64/pcpsha256nias.asm
@@ -31,13 +31,14 @@
 %if (_SHA_NI_ENABLING_ == _FEATURE_ON_) || (_SHA_NI_ENABLING_ == _FEATURE_TICKTOCK_)
 ;;%if (_IPP32E >= _IPP32E_Y8 )
 
-segment .text align=IPP_ALIGN_FACTOR
+segment .data align=IPP_ALIGN_FACTOR
 
 
 align IPP_ALIGN_FACTOR
 PSHUFFLE_BYTE_FLIP_MASK \
       DB     3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12
 
+segment .text align=IPP_ALIGN_FACTOR
 align IPP_ALIGN_FACTOR
 ;*****************************************************************************************
 ;* Purpose:     Update internal digest according to message block
diff --git a/sources/ippcp/asm_intel64/pcpsha256u8as.asm b/sources/ippcp/asm_intel64/pcpsha256u8as.asm
index d2920547..7df1ab13 100644
--- a/sources/ippcp/asm_intel64/pcpsha256u8as.asm
+++ b/sources/ippcp/asm_intel64/pcpsha256u8as.asm
@@ -271,12 +271,13 @@
    movdqa   %%xS, %%xS0
 %endmacro
 
-segment .text align=IPP_ALIGN_FACTOR
+segment .data align=IPP_ALIGN_FACTOR
 
 
 align IPP_ALIGN_FACTOR
 pByteSwp DB    3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12
 
+segment .text align=IPP_ALIGN_FACTOR
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; UpdateSHA256(Ipp32u digest[], Ipp8u dataBlock[], int datalen, Ipp32u K_256[])
diff --git a/sources/ippcp/asm_intel64/pcpsha512e9as.asm b/sources/ippcp/asm_intel64/pcpsha512e9as.asm
index a260b9e3..b28a869c 100644
--- a/sources/ippcp/asm_intel64/pcpsha512e9as.asm
+++ b/sources/ippcp/asm_intel64/pcpsha512e9as.asm
@@ -466,11 +466,12 @@
    ROTATE_W
 %endmacro
 
-segment .text align=IPP_ALIGN_FACTOR
+segment .data align=IPP_ALIGN_FACTOR
 
 align IPP_ALIGN_FACTOR
 SHUFB_BSWAP DB    7,6,5,4,3,2,1,0, 15,14,13,12,11,10,9,8
 
+segment .text align=IPP_ALIGN_FACTOR
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; UpdateSHA512(Ipp64u digest[], Ipp8u dataBlock[], int datalen, Ipp64u K_512[])
diff --git a/sources/ippcp/asm_intel64/pcpsha512l9as.asm b/sources/ippcp/asm_intel64/pcpsha512l9as.asm
index d5641e03..51abc748 100644
--- a/sources/ippcp/asm_intel64/pcpsha512l9as.asm
+++ b/sources/ippcp/asm_intel64/pcpsha512l9as.asm
@@ -354,7 +354,7 @@ vmovdqa  YMMWORD [rsi+(%%W_AHEAD/2)*sizeof(ymmword)+(%%nr/2)*sizeof(ymmword)],yT
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-segment .text align=IPP_ALIGN_FACTOR
+segment .data align=IPP_ALIGN_FACTOR
 
 
 align IPP_ALIGN_FACTOR
@@ -409,6 +409,7 @@ SHA512_YMM_BF  dq 00001020304050607h, 008090a0b0c0d0e0fh, 00001020304050607h, 00
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
+segment .text align=IPP_ALIGN_FACTOR
 align IPP_ALIGN_FACTOR
 IPPASM UpdateSHA512,PUBLIC
 %assign LOCAL_FRAME (sizeof(qword)*4 + sizeof(qword)*80*2)
diff --git a/sources/ippcp/asm_intel64/pcpsha512m7as.asm b/sources/ippcp/asm_intel64/pcpsha512m7as.asm
index 84be6606..f17bbcb8 100644
--- a/sources/ippcp/asm_intel64/pcpsha512m7as.asm
+++ b/sources/ippcp/asm_intel64/pcpsha512m7as.asm
@@ -208,10 +208,12 @@ segment .text align=IPP_ALIGN_FACTOR
 %xdefine KK_SHA512  rbp
 
 %if _IPP32E >= _IPP32E_U8
+segment .data align=IPP_ALIGN_FACTOR
 align IPP_ALIGN_FACTOR
 pByteSwp DB    7,6,5,4,3,2,1,0,  15,14,13,12,11,10,9,8
 %endif
 
+segment .text align=IPP_ALIGN_FACTOR
 align IPP_ALIGN_FACTOR
 IPPASM UpdateSHA512,PUBLIC
 %assign LOCAL_FRAME (16*sizeof(qword)+sizeof(qword))
diff --git a/sources/ippcp/asm_intel64/pcpsm2pfuncs_montas.asm b/sources/ippcp/asm_intel64/pcpsm2pfuncs_montas.asm
index 1c3eeb13..97bb741c 100644
--- a/sources/ippcp/asm_intel64/pcpsm2pfuncs_montas.asm
+++ b/sources/ippcp/asm_intel64/pcpsm2pfuncs_montas.asm
@@ -28,7 +28,7 @@
 %assign _xEMULATION_ 1
 %assign _ADCX_ADOX_  1
 
-segment .text align=IPP_ALIGN_FACTOR
+segment .data align=IPP_ALIGN_FACTOR
 
 align IPP_ALIGN_FACTOR
 
@@ -42,7 +42,7 @@ LOne     DD    1,1,1,1,1,1,1,1
 LTwo     DD    2,2,2,2,2,2,2,2
 LThree   DD    3,3,3,3,3,3,3,3
 
-
+segment .text align=IPP_ALIGN_FACTOR
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;
 ; void sm2_mul_by_2(uint64_t res[4], uint64_t a[4]);
diff --git a/sources/ippcp/asm_intel64/pcpsm3e9as.asm b/sources/ippcp/asm_intel64/pcpsm3e9as.asm
index 02cead2e..b3eb3139 100644
--- a/sources/ippcp/asm_intel64/pcpsm3e9as.asm
+++ b/sources/ippcp/asm_intel64/pcpsm3e9as.asm
@@ -687,7 +687,7 @@
   ;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 %endmacro
 
-segment .text align=IPP_ALIGN_FACTOR
+segment .data align=IPP_ALIGN_FACTOR
 
 
 align IPP_ALIGN_FACTOR
@@ -697,6 +697,7 @@ rol_32_8    DB    3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
 bcast       DB    0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3
 wzzz        DB    80h,80h,80h,80h, 80h,80h,80h,80h, 80h,80h,80h,80h,12,13,14,15
 
+segment .text align=IPP_ALIGN_FACTOR
 ;********************************************************************
 ;* void UpdateSM3(Ipp32u* hash,
 ;                 const Ipp8u* msg, int msgLen,
diff --git a/sources/ippcp/asm_intel64/pcpsm3u8as.asm b/sources/ippcp/asm_intel64/pcpsm3u8as.asm
index 495b8b8c..1e747c37 100644
--- a/sources/ippcp/asm_intel64/pcpsm3u8as.asm
+++ b/sources/ippcp/asm_intel64/pcpsm3u8as.asm
@@ -195,12 +195,13 @@
    mov   [rsp+ctr*sizeof(dword)+%%i*sizeof(dword)], t3
 %endmacro
 
-segment .text align=IPP_ALIGN_FACTOR
+segment .data align=IPP_ALIGN_FACTOR
 
 align IPP_ALIGN_FACTOR
 
 bswap128 DB 3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12
 
+segment .text align=IPP_ALIGN_FACTOR
 ;********************************************************************
 ;* void UpdateSM3(uint32_t hash[8],
 ;                const uint32_t msg[16], int msgLen,
diff --git a/sources/ippcp/asm_intel64/pcpvariant.inc b/sources/ippcp/asm_intel64/pcpvariant.inc
index f13fc807..528646ca 100644
--- a/sources/ippcp/asm_intel64/pcpvariant.inc
+++ b/sources/ippcp/asm_intel64/pcpvariant.inc
@@ -44,7 +44,7 @@
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;;
-;; it possible to force use of C-version of some implementtaions
+;; it possible to force use of C-version of some implementations
 ;; instead of ASM one
 ;;
 %ifndef _USE_C_cpAdd_BNU_
diff --git a/sources/ippcp/crypto_mb/Readme.md b/sources/ippcp/crypto_mb/Readme.md
index 72f6835b..a7958326 100644
--- a/sources/ippcp/crypto_mb/Readme.md
+++ b/sources/ippcp/crypto_mb/Readme.md
@@ -1,7 +1,7 @@
 # Crypto Multi-buffer Library
 
 Currently, the library provides optimized version of the following algorithms:
-1. RSA, ECDSA, ECDH, x25519, SM2 multi-buffer algorithms based on Intel® Advanced Vector Extensions 512 (Intel® AVX-512) integer fused multiply-add (IFMA) operations. This CPU feature is introduced with Intel® Microarchitecture Code Named Ice Lake. 
+1. RSA, ECDSA, ECDH, x25519, SM2 multi-buffer algorithms based on Intel® Advanced Vector Extensions 512 (Intel® AVX-512) integer fused multiply-add (IFMA) operations. This CPU feature is introduced with Intel® Microarchitecture Code Named Ice Lake.
 2. SM4 based on Intel(R) Advanced Vector Extensions 512 (Intel(R) AVX-512) GFNI instructions.
 3. SM3 based on Intel® Advanced Vector Extensions 512 (Intel® AVX-512) instructions.
 
@@ -32,6 +32,7 @@ This library consists of highly-optimized kernels taking advantage of Intel’s
 - GCC 11.4
 - Clang 9.0
 - Clang 12.0
+- Clang 16.0
 - GNU binutils 2.32
 
 ### Windows* OS
@@ -96,11 +97,11 @@ You can find the installed files in:
 ```
 > **Note**: This project uses the default `RPATH` settings:
 >
-> CMake is linking the executables and shared libraries with full `RPATH` to all used 
-> libraries in the build tree. When installing, CMake will clear the `RPATH` of these 
-> targets so they are installed with an empty `RPATH`.  
-> In this case to resolve the Crypto Multi-buffer Library dependency on OpenSSL it is 
-> necessary to update `LD_LIBRARY_PATH` with the path to the target OpenSSL library. 
+> CMake is linking the executables and shared libraries with full `RPATH` to all used
+> libraries in the build tree. When installing, CMake will clear the `RPATH` of these
+> targets so they are installed with an empty `RPATH`.
+> In this case to resolve the Crypto Multi-buffer Library dependency on OpenSSL it is
+> necessary to update `LD_LIBRARY_PATH` with the path to the target OpenSSL library.
 
 
 ## How to Build
diff --git a/sources/ippcp/crypto_mb/include/crypto_mb/defs.h b/sources/ippcp/crypto_mb/include/crypto_mb/defs.h
index 085376ae..a52516ab 100644
--- a/sources/ippcp/crypto_mb/include/crypto_mb/defs.h
+++ b/sources/ippcp/crypto_mb/include/crypto_mb/defs.h
@@ -33,8 +33,8 @@ typedef unsigned long long int64u;
       #define __ALIGN64 __attribute__((aligned(64)))
    #endif
 
-   #if !defined(__INLINE)
-      #define __INLINE static __inline__
+   #if !defined(__MBX_INLINE)
+      #define __MBX_INLINE static __inline__
    #endif
 
    #if !defined(__NOINLINE)
@@ -45,8 +45,8 @@ typedef unsigned long long int64u;
       #define __ALIGN64 __declspec(align(64))
    #endif
 
-   #if !defined(__INLINE)
-      #define __INLINE static __forceinline
+   #if !defined(__MBX_INLINE)
+      #define __MBX_INLINE static __forceinline
    #endif
 
    #if !defined(__NOINLINE)
diff --git a/sources/ippcp/crypto_mb/include/crypto_mb/fips_cert.h b/sources/ippcp/crypto_mb/include/crypto_mb/fips_cert.h
index 5a06286b..0f6461fe 100644
--- a/sources/ippcp/crypto_mb/include/crypto_mb/fips_cert.h
+++ b/sources/ippcp/crypto_mb/include/crypto_mb/fips_cert.h
@@ -109,7 +109,7 @@ EXTERN_C fips_test_status fips_selftest_mbx_rsa4k_private_crt_ssl_mb8(void);
 
 #endif // BN_OPEN_SSL_DISABLE
 
-/* 
+/*
 // Enumerator that contains information about FIPS-approved
 // functions inside the crypto_mb cryptographic boundary
 */
@@ -138,11 +138,11 @@ enum FIPS_CRYPTO_MB_FUNC {
   nistp256_ecdsa_verify_mb8,
   nistp384_ecdsa_verify_mb8,
   nistp521_ecdsa_verify_mb8,
-  
+
   ed25519_public_key_mb8,
   ed25519_sign_mb8,
   ed25519_verify_mb8,
-  
+
   rsa_public_mb8,
   rsa_private_mb8,
   rsa_private_crt_mb8,
@@ -170,7 +170,7 @@ enum FIPS_CRYPTO_MB_FUNC {
   nistp256_ecdsa_verify_ssl_mb8,
   nistp384_ecdsa_verify_ssl_mb8,
   nistp521_ecdsa_verify_ssl_mb8,
-  
+
   rsa_public_ssl_mb8,
   rsa_private_ssl_mb8,
   rsa_private_crt_ssl_mb8,
@@ -184,7 +184,7 @@ enum FIPS_CRYPTO_MB_FUNC {
 
   x25519_public_key_mb8,
   x25519_mb8,
-  
+
   sm2_ecpublic_key_mb8,
   sm2_ecdh_mb8,
   sm2_ecdsa_sign_mb8,
@@ -233,20 +233,20 @@ enum FIPS_CRYPTO_MB_FUNC {
 /**
  * \brief
  *
- *  An indicator if a function is FIPS-approved or not  
- * 
- * \param[in] function              member of FIPS_CRYPTO_MB_FUNC enumerator  
+ *  An indicator if a function is FIPS-approved or not
+ *
+ * \param[in] function              member of FIPS_CRYPTO_MB_FUNC enumerator
  *                                  that corresponds to API being checked.
  * \return    func_fips_approved    equal to 1 if FIPS-approved algorithm is used
  *
- * Example: 
- *          Library API           FIPS_CRYPTO_MB_FUNC  
+ * Example:
+ *          Library API           FIPS_CRYPTO_MB_FUNC
  *       mbx_rsa_public_mb8   ->    rsa_public_mb8
  *     mbx_nistp256_ecdh_mb8  ->   nistp256_ecdh_mb8
  *      mbx_<functionality>   ->    <functionality>
- * 
+ *
  */
-__INLINE func_fips_approved mbx_is_fips_approved_func(enum FIPS_CRYPTO_MB_FUNC function)
+__MBX_INLINE func_fips_approved mbx_is_fips_approved_func(enum FIPS_CRYPTO_MB_FUNC function)
 {
   return ((int)function > 0);
 }
diff --git a/sources/ippcp/crypto_mb/include/crypto_mb/status.h b/sources/ippcp/crypto_mb/include/crypto_mb/status.h
index 16b395cd..7d5f3722 100644
--- a/sources/ippcp/crypto_mb/include/crypto_mb/status.h
+++ b/sources/ippcp/crypto_mb/include/crypto_mb/status.h
@@ -29,23 +29,23 @@ typedef int64u mbx_status16;
 #define MBX_STATUS_LOW_ORDER_ERR      (4)
 #define MBX_STATUS_SIGNATURE_ERR      (8)
 
-__INLINE mbx_status MBX_SET_STS(mbx_status status, int numb, mbx_status sttVal)
+__MBX_INLINE mbx_status MBX_SET_STS(mbx_status status, int numb, mbx_status sttVal)
 {
    numb &= 7; /* 0 <= numb < 8 */
    status &= (mbx_status)(~(0xF << (numb*4)));
    return status |= (sttVal & 0xF) << (numb*4);
 }
 
-__INLINE mbx_status MBX_GET_STS(mbx_status status, int numb)
+__MBX_INLINE mbx_status MBX_GET_STS(mbx_status status, int numb)
 {
    return (status >>(numb*4)) & 0xF;
 }
-__INLINE mbx_status MBX_SET_STS_ALL(mbx_status stsVal)
+__MBX_INLINE mbx_status MBX_SET_STS_ALL(mbx_status stsVal)
 {
    return (stsVal<<4*7) | (stsVal<<4*6) | (stsVal<<4*5) | (stsVal<<4*4)  | (stsVal<<4*3) | (stsVal<<4*2) | (stsVal<<4*1) | stsVal;
 }
 
-__INLINE mbx_status MBX_SET_STS_BY_MASK(mbx_status status, int8u mask, mbx_status sttVal)
+__MBX_INLINE mbx_status MBX_SET_STS_BY_MASK(mbx_status status, int8u mask, mbx_status sttVal)
 {
    int numb;
 
@@ -56,7 +56,7 @@ __INLINE mbx_status MBX_SET_STS_BY_MASK(mbx_status status, int8u mask, mbx_statu
    return status;
 }
 
-__INLINE int MBX_IS_ANY_OK_STS(mbx_status status)
+__MBX_INLINE int MBX_IS_ANY_OK_STS(mbx_status status)
 {
    int ret = MBX_STATUS_OK==MBX_GET_STS(status, 0)
           || MBX_STATUS_OK==MBX_GET_STS(status, 1)
@@ -74,30 +74,30 @@ __INLINE int MBX_IS_ANY_OK_STS(mbx_status status)
 */
 
 /* Accessors for the low and high part of 64-bit status */
-__INLINE mbx_status MBX_GET_HIGH_PART_STS16(mbx_status16 status16)
+__MBX_INLINE mbx_status MBX_GET_HIGH_PART_STS16(mbx_status16 status16)
 {
    return ((mbx_status)(((mbx_status16)(status16) >> 32) & 0xFFFFFFFF));
 }
 
-__INLINE mbx_status MBX_GET_LOW_PART_STS16(mbx_status16 status16)
+__MBX_INLINE mbx_status MBX_GET_LOW_PART_STS16(mbx_status16 status16)
 {
    return ((mbx_status)(status16));
 }
 
-__INLINE mbx_status16 MBX_SET_STS16_ALL(mbx_status16 stsVal)
+__MBX_INLINE mbx_status16 MBX_SET_STS16_ALL(mbx_status16 stsVal)
 {
    return (stsVal<<4*15) | (stsVal<<4*14) | (stsVal<<4*13) | (stsVal<<4*12) | (stsVal<<4*11)  | (stsVal<<4*10) | (stsVal<<4*9) | (stsVal<<4*8) | \
           (stsVal<<4*7)  | (stsVal<<4*6)  | (stsVal<<4*5)  | (stsVal<<4*4)  | (stsVal<<4*3)   | (stsVal<<4*2)  | (stsVal<<4*1) | stsVal;
 }
-   
-__INLINE mbx_status16 MBX_SET_STS16(mbx_status16 status, int numb, mbx_status16 sttVal)
+
+__MBX_INLINE mbx_status16 MBX_SET_STS16(mbx_status16 status, int numb, mbx_status16 sttVal)
 {
    numb &= 15; /* 0 <= numb < 16 */
    status &= (mbx_status16)(~((int64u)0xF << (numb*4)));
    return status |= (sttVal & 0xF) << (numb*4);
 }
 
-__INLINE mbx_status16 MBX_SET_STS16_BY_MASK(mbx_status16 status, int16u mask, mbx_status16 sttVal)
+__MBX_INLINE mbx_status16 MBX_SET_STS16_BY_MASK(mbx_status16 status, int16u mask, mbx_status16 sttVal)
 {
     int numb;
     for (numb = 0; numb < 16; numb++) {
@@ -107,7 +107,7 @@ __INLINE mbx_status16 MBX_SET_STS16_BY_MASK(mbx_status16 status, int16u mask, mb
     return status;
 }
 
-__INLINE int MBX_IS_ANY_OK_STS16(mbx_status16 status)
+__MBX_INLINE int MBX_IS_ANY_OK_STS16(mbx_status16 status)
 {
    return MBX_IS_ANY_OK_STS(MBX_GET_HIGH_PART_STS16(status)) || \
           MBX_IS_ANY_OK_STS(MBX_GET_LOW_PART_STS16(status));
diff --git a/sources/ippcp/crypto_mb/include/crypto_mb/version.h b/sources/ippcp/crypto_mb/include/crypto_mb/version.h
index 646111ed..e9927c28 100644
--- a/sources/ippcp/crypto_mb/include/crypto_mb/version.h
+++ b/sources/ippcp/crypto_mb/include/crypto_mb/version.h
@@ -21,14 +21,14 @@
 
 /* crypto_mb name & version */
 #define MBX_LIB_NAME()    "crypto_mb"
-#define MBX_VER_MAJOR  1
+#define MBX_VER_MAJOR  2
 #define MBX_VER_MINOR  0
-#define MBX_VER_REV    13
+#define MBX_VER_REV    0
 
 /* major interface version */
-#define MBX_INTERFACE_VERSION_MAJOR 11
+#define MBX_INTERFACE_VERSION_MAJOR 12
 /* minor interface version */
-#define MBX_INTERFACE_VERSION_MINOR 14
+#define MBX_INTERFACE_VERSION_MINOR 0
 
 typedef struct {
    int    major;          /* e.g. 1               */
diff --git a/sources/ippcp/crypto_mb/include/internal/common/crypto_mb_ver.h b/sources/ippcp/crypto_mb/include/internal/common/crypto_mb_ver.h
index d154d244..7f0d254b 100644
--- a/sources/ippcp/crypto_mb/include/internal/common/crypto_mb_ver.h
+++ b/sources/ippcp/crypto_mb/include/internal/common/crypto_mb_ver.h
@@ -29,6 +29,12 @@
 #define MBX_BUILD() 1043
 #define MBX_VERSION() MBX_BASE_VERSION(),MBX_BUILD()
 
+#ifndef STR_MBX_BASE_VERSION
+#define STR_MBX_BASE_VERSION() STR(MBX_VER_MAJOR) "," STR(MBX_VER_MINOR) "," STR(MBX_VER_REV)
+#endif
+
+#define STR_FILE_MBX_VERSION() STR_MBX_BASE_VERSION()"," STR(MBX_BUILD())
+
 #ifndef MBX_STR_VERSION
 #define MBX_STR_VERSION() STR(MBX_VER_MAJOR) "." STR(MBX_VER_MINOR) "." STR(MBX_VER_REV) " (" STR(MBX_INTERFACE_VERSION_MAJOR) "." STR(MBX_INTERFACE_VERSION_MINOR) ")"
 #endif
diff --git a/sources/ippcp/crypto_mb/include/internal/common/ifma_math.h b/sources/ippcp/crypto_mb/include/internal/common/ifma_math.h
index 19b966b2..3614e017 100644
--- a/sources/ippcp/crypto_mb/include/internal/common/ifma_math.h
+++ b/sources/ippcp/crypto_mb/include/internal/common/ifma_math.h
@@ -34,30 +34,30 @@
         #define SIMD_BYTES (SIMD_LEN/8)
         #define MB_WIDTH   (SIMD_LEN/64)
 
-        __INLINE U64 loadu64(const void *p) {
+        __MBX_INLINE U64 loadu64(const void *p) {
             return _mm512_loadu_si512((U64*)p);
         }
 
-        __INLINE U64 loadstream64(const void *p) {
+        __MBX_INLINE U64 loadstream64(const void *p) {
             return _mm512_stream_load_si512 ((U64*)p);
         }
 
-        __INLINE void storeu64(const void *p, U64 v) {
+        __MBX_INLINE void storeu64(const void *p, U64 v) {
             _mm512_storeu_si512((U64*)p, v);
         }
 
         #define mask_mov64 _mm512_mask_mov_epi64
         #define set64      _mm512_set1_epi64
 
-        __INLINE U64 fma52lo(U64 a, U64 b, U64 c) {
+        __MBX_INLINE U64 fma52lo(U64 a, U64 b, U64 c) {
             return _mm512_madd52lo_epu64(a, b, c);
         }
 
-        __INLINE U64 fma52hi(U64 a, U64 b, U64 c) {
+        __MBX_INLINE U64 fma52hi(U64 a, U64 b, U64 c) {
             return _mm512_madd52hi_epu64(a, b, c);
         }
 
-        __INLINE U64 mul52lo(U64 b, U64 c) {
+        __MBX_INLINE U64 mul52lo(U64 b, U64 c) {
             return _mm512_madd52lo_epu64(_mm512_setzero_si512(), b, c);
         }
 
@@ -73,7 +73,7 @@
                 __asm__ ( "vpmadd52huq " #o "(%2), %1, %0" : "+x" (r): "x" (b), "r" (c) ); \
             }
 
-            __INLINE U64 select64(__mb_mask k, U64 v, U64 *d) {
+            __MBX_INLINE U64 select64(__mb_mask k, U64 v, U64 *d) {
                 __asm__("vmovdqu64 %2, %%zmm0 \n"
                         "vpblendmq %%zmm0, %0, %0 %{%1%} \n"
                 : "+v"(v)
@@ -81,9 +81,9 @@
                 : "zmm0");
                 return v;
             }
-            
+
         #else
-            // Use IFMA instrinsics for all other compilers
+            // Use IFMA intrinsics for all other compilers
             #define _mm512_madd52lo_epu64_(r, a, b, c, o) {\
                 r=fma52lo(a, b, _mm512_loadu_si512((U64*)(((char*)c)+o))); \
             }
@@ -93,48 +93,48 @@
             }
 
             #pragma optimize("", off)
-            __INLINE U64 select64(__mb_mask k, U64 v, U64 *d) {
+            __MBX_INLINE U64 select64(__mb_mask k, U64 v, U64 *d) {
                 return _mm512_mask_blend_epi64(k, v, _mm512_load_si512(d));
             }
-            
+
             #pragma optimize("", on)
         #endif
 
         #define fma52lo_mem(r, a, b, c, o) _mm512_madd52lo_epu64_(r, a, b, c, o) // gres
         #define fma52hi_mem(r, a, b, c, o) _mm512_madd52hi_epu64_(r, a, b, c, o) // gres
 
-        __INLINE U64 add64(U64 a, U64 b) {
+        __MBX_INLINE U64 add64(U64 a, U64 b) {
             return _mm512_add_epi64(a, b);
         }
 
-        __INLINE U64 sub64(U64 a, U64 b) {
+        __MBX_INLINE U64 sub64(U64 a, U64 b) {
             return _mm512_sub_epi64(a, b);
         }
 
-        __INLINE U64 get_zero64() {
+        __MBX_INLINE U64 get_zero64() {
             return _mm512_setzero_si512();
         }
 
-        __INLINE void set_zero64(U64 *a) {
+        __MBX_INLINE void set_zero64(U64 *a) {
             *a = _mm512_xor_si512(*a, *a);
         }
 
-        __INLINE U64 set1(unsigned long long a) {
+        __MBX_INLINE U64 set1(unsigned long long a) {
             return _mm512_set1_epi64((long long)a);
         }
 
-        __INLINE U64 srli64(U64 a, int s) {
+        __MBX_INLINE U64 srli64(U64 a, int s) {
             return _mm512_srli_epi64(a, s);
         }
 
         #define srai64 _mm512_srai_epi64
         #define slli64 _mm512_slli_epi64
 
-        __INLINE U64 and64_const(U64 a, unsigned long long mask) {
+        __MBX_INLINE U64 and64_const(U64 a, unsigned long long mask) {
             return _mm512_and_epi64(a, _mm512_set1_epi64((long long)mask));
         }
 
-        __INLINE U64 and64(U64 a, U64 mask) {
+        __MBX_INLINE U64 and64(U64 a, U64 mask) {
             return _mm512_and_epi64(a, mask);
         }
 
@@ -150,7 +150,7 @@
         #define mask_sub64   _mm512_mask_sub_epi64
         #define maskz_sub64  _mm512_maskz_sub_epi64
 
-        __INLINE __mb_mask is_zero(U64* p, int len) {
+        __MBX_INLINE __mb_mask is_zero(U64* p, int len) {
             U64 Z = p[0];
             for(int i = 1; i < len; i++) {
                 Z = or64(Z, p[i]);
@@ -164,7 +164,7 @@
         #else
             #define mask_xor _kxor_mask8
         #endif
-        
+
         #define get_mask(a)       (a)
         #define get_mask_value(a) (a)
 
@@ -196,7 +196,7 @@
             X5_ = _mm512_mask_shuffle_i64x2(X45H, 0b11001111, X0123H, X67H, 0b10001000 ); \
             X7_ = _mm512_mask_shuffle_i64x2(X67H, 0b00111111, X0123H, X45H, 0b10111101 ); \
         }
-        
+
     #else
         #error "Incorrect SIMD length"
     #endif  // SIMD_LEN
diff --git a/sources/ippcp/crypto_mb/include/internal/common/mem_fns.h b/sources/ippcp/crypto_mb/include/internal/common/mem_fns.h
index 8695e0f4..6dfa9db5 100644
--- a/sources/ippcp/crypto_mb/include/internal/common/mem_fns.h
+++ b/sources/ippcp/crypto_mb/include/internal/common/mem_fns.h
@@ -17,7 +17,7 @@
 /*
  * Auxiliary functions to set and copy memory
  */
-__INLINE void CopyBlock(const void* pSrc, void* pDst, int numBytes)
+__MBX_INLINE void CopyBlock(const void* pSrc, void* pDst, int numBytes)
 {
     const int8u* s = (int8u*)pSrc;
     int8u* d = (int8u*)pDst;
@@ -26,7 +26,7 @@ __INLINE void CopyBlock(const void* pSrc, void* pDst, int numBytes)
         d[k] = s[k];
 }
 
-__INLINE void PadBlock(int8u paddingByte, void* pDst, int numBytes)
+__MBX_INLINE void PadBlock(int8u paddingByte, void* pDst, int numBytes)
 {
     int8u* d = (int8u*)pDst;
     int k;
diff --git a/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_arith_p256.h b/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_arith_p256.h
index 028ce94a..ca3d26ba 100644
--- a/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_arith_p256.h
+++ b/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_arith_p256.h
@@ -49,23 +49,23 @@ static const int64u VMASK52[sizeof(U64)/sizeof(int64u)] = {
 
 
 /* set FE to zero */
-__INLINE void MB_FUNC_NAME(zero_FE256_)(U64 T[])
+__MBX_INLINE void MB_FUNC_NAME(zero_FE256_)(U64 T[])
 {
    T[0] = T[1] = T[2] = T[3] = T[4] = get_zero64();
 }
 
 /* check if FE is zero */
-__INLINE __mb_mask MB_FUNC_NAME(is_zero_FE256_)(const U64 T[])
+__MBX_INLINE __mb_mask MB_FUNC_NAME(is_zero_FE256_)(const U64 T[])
 {
    U64 Z = or64(or64(T[0], T[1]), or64(or64(T[2], T[3]), T[4]));
    return cmpeq64_mask(Z, get_zero64());
 }
 
-__INLINE U64 cmov_U64(U64 a, U64 b, __mb_mask kmask)
+__MBX_INLINE U64 cmov_U64(U64 a, U64 b, __mb_mask kmask)
 {  return mask_mov64 (a, kmask, b); }
 
 /* move field element */
-__INLINE void MB_FUNC_NAME(mov_FE256_)(U64 r[], const U64 a[])
+__MBX_INLINE void MB_FUNC_NAME(mov_FE256_)(U64 r[], const U64 a[])
 {
    r[0] = a[0];
    r[1] = a[1];
@@ -75,7 +75,8 @@ __INLINE void MB_FUNC_NAME(mov_FE256_)(U64 r[], const U64 a[])
 }
 
 /* move coordinate using mask: R = k? A : B */
-__INLINE void MB_FUNC_NAME(mask_mov_FE256_)(U64 R[], const U64 B[], __mb_mask k, const U64 A[])
+OPTIMIZE_OFF_VS19
+__MBX_INLINE void MB_FUNC_NAME(mask_mov_FE256_)(U64 R[], const U64 B[], __mb_mask k, const U64 A[])
 {
    R[0] = mask_mov64(B[0], k, A[0]);
    R[1] = mask_mov64(B[1], k, A[1]);
@@ -84,7 +85,7 @@ __INLINE void MB_FUNC_NAME(mask_mov_FE256_)(U64 R[], const U64 B[], __mb_mask k,
    R[4] = mask_mov64(B[4], k, A[4]);
 }
 
-__INLINE void MB_FUNC_NAME(secure_mask_mov_FE256_)(U64 R[], U64 B[], __mb_mask k, const U64 A[])
+__MBX_INLINE void MB_FUNC_NAME(secure_mask_mov_FE256_)(U64 R[], U64 B[], __mb_mask k, const U64 A[])
 {
    R[0] = select64(k, B[0], (U64*)(&A[0]));
    R[1] = select64(k, B[1], (U64*)(&A[1]));
@@ -94,7 +95,7 @@ __INLINE void MB_FUNC_NAME(secure_mask_mov_FE256_)(U64 R[], U64 B[], __mb_mask k
 }
 
 /* compare two FE */
-__INLINE __mb_mask MB_FUNC_NAME(cmp_lt_FE256_)(const U64 A[], const U64 B[])
+__MBX_INLINE __mb_mask MB_FUNC_NAME(cmp_lt_FE256_)(const U64 A[], const U64 B[])
 {
    /* r = a - b */
    U64 r0 = sub64(A[0], B[0]);
@@ -113,7 +114,7 @@ __INLINE __mb_mask MB_FUNC_NAME(cmp_lt_FE256_)(const U64 A[], const U64 B[])
    return cmp64_mask(r4, get_zero64(), _MM_CMPINT_LT);
 }
 
-__INLINE __mb_mask MB_FUNC_NAME(cmp_eq_FE256_)(const U64 A[], const U64 B[]) 
+__MBX_INLINE __mb_mask MB_FUNC_NAME(cmp_eq_FE256_)(const U64 A[], const U64 B[])
 {
    __ALIGN64 U64 msg[P256_LEN52];
 
@@ -122,7 +123,7 @@ __INLINE __mb_mask MB_FUNC_NAME(cmp_eq_FE256_)(const U64 A[], const U64 B[])
    msg[2] = xor64(A[2], B[2]);
    msg[3] = xor64(A[3], B[3]);
    msg[4] = xor64(A[4], B[4]);
-   
+
    return MB_FUNC_NAME(is_zero_FE256_)(msg);
 }
 
diff --git a/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_arith_p384.h b/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_arith_p384.h
index 7414ccc8..2f043a09 100644
--- a/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_arith_p384.h
+++ b/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_arith_p384.h
@@ -53,24 +53,24 @@ static const int64u VMASK52[sizeof(U64)/sizeof(int64u)] = {
 
 
 /* set FE to zero */
-__INLINE void MB_FUNC_NAME(zero_FE384_)(U64 T[])
+__MBX_INLINE void MB_FUNC_NAME(zero_FE384_)(U64 T[])
 {
    T[0] = T[1] = T[2] = T[3] = T[4] = T[5] = T[6] = T[7] = get_zero64();
 }
 
 /* check if FE is zero */
-__INLINE __mb_mask MB_FUNC_NAME(is_zero_FE384_)(const U64 T[])
+__MBX_INLINE __mb_mask MB_FUNC_NAME(is_zero_FE384_)(const U64 T[])
 {
    //U64 Z = or64(or64(or64(or64(or64(or64(or64(T[0], T[1]), T[2]), T[3]), T[4]), T[5]), T[6]), T[7]);
    U64 Z = or64(or64(or64(T[0], T[1]), or64(T[2], T[3])), or64(or64(T[4], T[5]), or64(T[6], T[7])));
    return cmpeq64_mask(Z, get_zero64());
 }
 
-__INLINE U64 cmov_U64(U64 a, U64 b, __mb_mask kmask)
+__MBX_INLINE U64 cmov_U64(U64 a, U64 b, __mb_mask kmask)
 {  return mask_mov64 (a, kmask, b); }
 
 /* move field element */
-__INLINE void MB_FUNC_NAME(mov_FE384_)(U64 r[], const U64 a[])
+__MBX_INLINE void MB_FUNC_NAME(mov_FE384_)(U64 r[], const U64 a[])
 {
    r[0] = a[0];
    r[1] = a[1];
@@ -83,7 +83,8 @@ __INLINE void MB_FUNC_NAME(mov_FE384_)(U64 r[], const U64 a[])
 }
 
 /* move coordinate using mask: R = k? A : B */
-__INLINE void MB_FUNC_NAME(mask_mov_FE384_)(U64 R[], const U64 B[], __mb_mask k, const U64 A[])
+OPTIMIZE_OFF_VS19
+__MBX_INLINE void MB_FUNC_NAME(mask_mov_FE384_)(U64 R[], const U64 B[], __mb_mask k, const U64 A[])
 {
    R[0] = mask_mov64(B[0], k, A[0]);
    R[1] = mask_mov64(B[1], k, A[1]);
@@ -95,7 +96,7 @@ __INLINE void MB_FUNC_NAME(mask_mov_FE384_)(U64 R[], const U64 B[], __mb_mask k,
    R[7] = mask_mov64(B[7], k, A[7]);
 }
 
-__INLINE void MB_FUNC_NAME(secure_mask_mov_FE384_)(U64 R[], U64 B[], __mb_mask k, const U64 A[])
+__MBX_INLINE void MB_FUNC_NAME(secure_mask_mov_FE384_)(U64 R[], U64 B[], __mb_mask k, const U64 A[])
 {
    R[0] = select64(k, B[0], (U64*)(&A[0]));
    R[1] = select64(k, B[1], (U64*)(&A[1]));
@@ -107,7 +108,7 @@ __INLINE void MB_FUNC_NAME(secure_mask_mov_FE384_)(U64 R[], U64 B[], __mb_mask k
    R[7] = select64(k, B[7], (U64*)(&A[7]));
 }
 
-__INLINE __mb_mask MB_FUNC_NAME(cmp_lt_FE384_)(const U64 A[], const U64 B[])
+__MBX_INLINE __mb_mask MB_FUNC_NAME(cmp_lt_FE384_)(const U64 A[], const U64 B[])
 {
    /* r = a - b */
    U64 r0 = sub64(A[0], B[0]);
@@ -134,7 +135,7 @@ __INLINE __mb_mask MB_FUNC_NAME(cmp_lt_FE384_)(const U64 A[], const U64 B[])
 }
 
 /* compare two FE */
-__INLINE __mb_mask MB_FUNC_NAME(cmp_eq_FE384_)(const U64 A[], const U64 B[])
+__MBX_INLINE __mb_mask MB_FUNC_NAME(cmp_eq_FE384_)(const U64 A[], const U64 B[])
 {
     U64 T[P384_LEN52];
     T[0] = xor64(A[0], B[0]);
diff --git a/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_arith_p521.h b/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_arith_p521.h
index 3caee065..1426a982 100644
--- a/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_arith_p521.h
+++ b/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_arith_p521.h
@@ -56,23 +56,23 @@ static const int64u VMASK52[sizeof(U64)/sizeof(int64u)] = {
 
 
 /* set FE to zero */
-__INLINE void MB_FUNC_NAME(zero_FE521_)(U64 T[])
+__MBX_INLINE void MB_FUNC_NAME(zero_FE521_)(U64 T[])
 {
    T[0] = T[1] = T[2] = T[3] = T[4] = T[5] = T[6] = T[7] = T[8] = T[9] = T[10] = get_zero64();
 }
 
 /* check if FE is zero */
-__INLINE __mb_mask MB_FUNC_NAME(is_zero_FE521_)(const U64 T[])
+__MBX_INLINE __mb_mask MB_FUNC_NAME(is_zero_FE521_)(const U64 T[])
 {
    U64 Z = or64(or64(or64(or64(T[0], T[1]), or64(T[2], T[3])), or64(or64(T[4], T[5]), or64(T[6], T[7]))), or64(or64(T[8], T[9]), T[10]));
    return cmpeq64_mask(Z, get_zero64());
 }
 
-__INLINE U64 cmov_U64(U64 a, U64 b, __mb_mask kmask)
+__MBX_INLINE U64 cmov_U64(U64 a, U64 b, __mb_mask kmask)
 {  return mask_mov64 (a, kmask, b); }
 
 /* move field element */
-__INLINE void MB_FUNC_NAME(mov_FE521_)(U64 r[], const U64 a[])
+__MBX_INLINE void MB_FUNC_NAME(mov_FE521_)(U64 r[], const U64 a[])
 {
    r[0] = a[0];
    r[1] = a[1];
@@ -88,7 +88,8 @@ __INLINE void MB_FUNC_NAME(mov_FE521_)(U64 r[], const U64 a[])
 }
 
 /* move coordinate using mask: R = k? A : B */
-__INLINE void MB_FUNC_NAME(mask_mov_FE521_)(U64 R[], const U64 B[], __mb_mask k, const U64 A[])
+OPTIMIZE_OFF_VS19
+__MBX_INLINE void MB_FUNC_NAME(mask_mov_FE521_)(U64 R[], const U64 B[], __mb_mask k, const U64 A[])
 {
    R[0] = mask_mov64(B[0], k, A[0]);
    R[1] = mask_mov64(B[1], k, A[1]);
@@ -103,7 +104,7 @@ __INLINE void MB_FUNC_NAME(mask_mov_FE521_)(U64 R[], const U64 B[], __mb_mask k,
    R[10]= mask_mov64(B[10],k, A[10]);
 }
 
-__INLINE void MB_FUNC_NAME(secure_mask_mov_FE521_)(U64 R[], U64 B[], __mb_mask k, const U64 A[])
+__MBX_INLINE void MB_FUNC_NAME(secure_mask_mov_FE521_)(U64 R[], U64 B[], __mb_mask k, const U64 A[])
 {
    R[0] = select64(k, B[0], (U64*)(&A[0]));
    R[1] = select64(k, B[1], (U64*)(&A[1]));
@@ -118,7 +119,7 @@ __INLINE void MB_FUNC_NAME(secure_mask_mov_FE521_)(U64 R[], U64 B[], __mb_mask k
    R[10]= select64(k,B[10], (U64*)(&A[10]));
 }
 
-__INLINE __mb_mask MB_FUNC_NAME(cmp_lt_FE521_)(const U64 A[], const U64 B[])
+__MBX_INLINE __mb_mask MB_FUNC_NAME(cmp_lt_FE521_)(const U64 A[], const U64 B[])
 {
    /* r = a - b */
    U64 r0 = sub64(A[0], B[0]);
@@ -149,7 +150,7 @@ __INLINE __mb_mask MB_FUNC_NAME(cmp_lt_FE521_)(const U64 A[], const U64 B[])
    return cmp64_mask(r10, get_zero64(), _MM_CMPINT_LT);
 }
 
-__INLINE __mb_mask MB_FUNC_NAME(cmp_eq_FE521_)(const U64 A[], const U64 B[])
+__MBX_INLINE __mb_mask MB_FUNC_NAME(cmp_eq_FE521_)(const U64 A[], const U64 B[])
 {
     U64 T[P521_LEN52];
 
diff --git a/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_ecpoint_p256.h b/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_ecpoint_p256.h
index 4b5f37e7..99d8ff3e 100644
--- a/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_ecpoint_p256.h
+++ b/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_ecpoint_p256.h
@@ -38,13 +38,13 @@ typedef struct {
 
 
 /* check if coordinate is zero */
-__INLINE __mb_mask MB_FUNC_NAME(is_zero_point_cordinate_)(const U64 T[])
+__MBX_INLINE __mb_mask MB_FUNC_NAME(is_zero_point_cordinate_)(const U64 T[])
 {
    return MB_FUNC_NAME(is_zero_FE256_)(T);
 }
 
 /* set point to infinity */
-__INLINE void MB_FUNC_NAME(set_point_to_infinity_)(P256_POINT* r)
+__MBX_INLINE void MB_FUNC_NAME(set_point_to_infinity_)(P256_POINT* r)
 {
    r->X[0] = r->X[1] = r->X[2] = r->X[3] = r->X[4] = get_zero64();
    r->Y[0] = r->Y[1] = r->Y[2] = r->Y[3] = r->Y[4] = get_zero64();
@@ -52,7 +52,7 @@ __INLINE void MB_FUNC_NAME(set_point_to_infinity_)(P256_POINT* r)
 }
 
 /* set point to infinity by mask */
-__INLINE void MB_FUNC_NAME(mask_set_point_to_infinity_)(P256_POINT* r, __mb_mask mask)
+__MBX_INLINE void MB_FUNC_NAME(mask_set_point_to_infinity_)(P256_POINT* r, __mb_mask mask)
 {
    U64 zeros = get_zero64();
 
@@ -76,7 +76,7 @@ __INLINE void MB_FUNC_NAME(mask_set_point_to_infinity_)(P256_POINT* r, __mb_mask
 }
 
 /* set affine point to infinity */
-__INLINE void MB_FUNC_NAME(set_point_affine_to_infinity_)(P256_POINT_AFFINE* r)
+__MBX_INLINE void MB_FUNC_NAME(set_point_affine_to_infinity_)(P256_POINT_AFFINE* r)
 {
    r->x[0] = r->x[1] = r->x[2] = r->x[3] = r->x[4] = get_zero64();
    r->y[0] = r->y[1] = r->y[2] = r->y[3] = r->y[4] = get_zero64();
diff --git a/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_ecpoint_p384.h b/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_ecpoint_p384.h
index 9d224750..180e497d 100644
--- a/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_ecpoint_p384.h
+++ b/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_ecpoint_p384.h
@@ -38,13 +38,13 @@ typedef struct {
 
 
 /* check if coordinate is zero */
-__INLINE __mb_mask MB_FUNC_NAME(is_zero_point_cordinate_)(const U64 T[])
+__MBX_INLINE __mb_mask MB_FUNC_NAME(is_zero_point_cordinate_)(const U64 T[])
 {
    return MB_FUNC_NAME(is_zero_FE384_)(T);
 }
 
 /* set point to infinity */
-__INLINE void MB_FUNC_NAME(set_point_to_infinity_)(P384_POINT* r)
+__MBX_INLINE void MB_FUNC_NAME(set_point_to_infinity_)(P384_POINT* r)
 {
    r->X[0] = r->X[1] = r->X[2] = r->X[3] = r->X[4] = r->X[5] = r->X[6] = r->X[7] = get_zero64();
    r->Y[0] = r->Y[1] = r->Y[2] = r->Y[3] = r->Y[4] = r->Y[5] = r->Y[6] = r->Y[7] = get_zero64();
@@ -52,7 +52,7 @@ __INLINE void MB_FUNC_NAME(set_point_to_infinity_)(P384_POINT* r)
 }
 
 /* set point to infinity by mask */
-__INLINE void MB_FUNC_NAME(mask_set_point_to_infinity_)(P384_POINT* r, __mb_mask mask)
+__MBX_INLINE void MB_FUNC_NAME(mask_set_point_to_infinity_)(P384_POINT* r, __mb_mask mask)
 {
    U64 zeros = get_zero64();
 
@@ -85,7 +85,7 @@ __INLINE void MB_FUNC_NAME(mask_set_point_to_infinity_)(P384_POINT* r, __mb_mask
 }
 
 /* set affine point to infinity */
-__INLINE void MB_FUNC_NAME(set_point_affine_to_infinity_)(P384_POINT_AFFINE* r)
+__MBX_INLINE void MB_FUNC_NAME(set_point_affine_to_infinity_)(P384_POINT_AFFINE* r)
 {
    r->x[0] = r->x[1] = r->x[2] = r->x[3] = r->x[4] = r->x[5] = r->x[6] = r->x[7] = get_zero64();
    r->y[0] = r->y[1] = r->y[2] = r->y[3] = r->y[4] = r->y[5] = r->y[6] = r->y[7] = get_zero64();
diff --git a/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_ecpoint_p521.h b/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_ecpoint_p521.h
index c6a3d71e..66b5eb0a 100644
--- a/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_ecpoint_p521.h
+++ b/sources/ippcp/crypto_mb/include/internal/ecnist/ifma_ecpoint_p521.h
@@ -38,13 +38,13 @@ typedef struct {
 
 
 /* check if coordinate is zero */
-__INLINE __mb_mask MB_FUNC_NAME(is_zero_point_cordinate_)(const U64 T[])
+__MBX_INLINE __mb_mask MB_FUNC_NAME(is_zero_point_cordinate_)(const U64 T[])
 {
    return MB_FUNC_NAME(is_zero_FE521_)(T);
 }
 
 /* set point to infinity */
-__INLINE void MB_FUNC_NAME(set_point_to_infinity_)(P521_POINT* r)
+__MBX_INLINE void MB_FUNC_NAME(set_point_to_infinity_)(P521_POINT* r)
 {
    r->X[0] = r->X[1] = r->X[2] = r->X[3] = r->X[4] = r->X[5] = r->X[6] = r->X[7] = r->X[8] = r->X[9] = r->X[10] = get_zero64();
    r->Y[0] = r->Y[1] = r->Y[2] = r->Y[3] = r->Y[4] = r->Y[5] = r->Y[6] = r->Y[7] = r->Y[8] = r->Y[9] = r->Y[10] = get_zero64();
@@ -52,7 +52,7 @@ __INLINE void MB_FUNC_NAME(set_point_to_infinity_)(P521_POINT* r)
 }
 
 /* set point to infinity by mask */
-__INLINE void MB_FUNC_NAME(mask_set_point_to_infinity_)(P521_POINT* r, __mb_mask mask)
+__MBX_INLINE void MB_FUNC_NAME(mask_set_point_to_infinity_)(P521_POINT* r, __mb_mask mask)
 {
    U64 zeros = get_zero64();
 
@@ -94,7 +94,7 @@ __INLINE void MB_FUNC_NAME(mask_set_point_to_infinity_)(P521_POINT* r, __mb_mask
 }
 
 /* set affine point to infinity */
-__INLINE void MB_FUNC_NAME(set_point_affine_to_infinity_)(P521_POINT_AFFINE* r)
+__MBX_INLINE void MB_FUNC_NAME(set_point_affine_to_infinity_)(P521_POINT_AFFINE* r)
 {
    r->x[0] = r->x[1] = r->x[2] = r->x[3] = r->x[4] = r->x[5] = r->x[6] = r->x[7] = r->x[8] = r->x[9] = r->x[10] = get_zero64();
    r->y[0] = r->y[1] = r->y[2] = r->y[3] = r->y[4] = r->y[5] = r->y[6] = r->y[7] = r->y[8] = r->y[9] = r->y[10] = get_zero64();
diff --git a/sources/ippcp/crypto_mb/include/internal/ed25519/ifma_arith_ed25519.h b/sources/ippcp/crypto_mb/include/internal/ed25519/ifma_arith_ed25519.h
index a56ad98d..f28aa980 100644
--- a/sources/ippcp/crypto_mb/include/internal/ed25519/ifma_arith_ed25519.h
+++ b/sources/ippcp/crypto_mb/include/internal/ed25519/ifma_arith_ed25519.h
@@ -72,7 +72,7 @@ typedef struct ge52_cached_mb_t {
 */
 
 /* ext => homo */
-__INLINE void ge52_ext_to_homo_mb(ge52_homo_mb*r, const ge52_ext_mb* p)
+__MBX_INLINE void ge52_ext_to_homo_mb(ge52_homo_mb*r, const ge52_ext_mb* p)
 {
    fe52_copy_mb(r->X, p->X);
    fe52_copy_mb(r->Y, p->Y);
@@ -80,7 +80,7 @@ __INLINE void ge52_ext_to_homo_mb(ge52_homo_mb*r, const ge52_ext_mb* p)
 }
 
 /* p1p1 => homo */
-__INLINE void ge52_p1p1_to_homo_mb(ge52_homo_mb *r, const ge52_p1p1_mb *p)
+__MBX_INLINE void ge52_p1p1_to_homo_mb(ge52_homo_mb *r, const ge52_p1p1_mb *p)
 {
    fe52_mul(r->X, p->X, p->T);
    fe52_mul(r->Y, p->Y, p->Z);
@@ -88,7 +88,7 @@ __INLINE void ge52_p1p1_to_homo_mb(ge52_homo_mb *r, const ge52_p1p1_mb *p)
 }
 
 /* p1p1 => ext */
-__INLINE void ge52_p1p1_to_ext_mb(ge52_ext_mb *r, const ge52_p1p1_mb *p)
+__MBX_INLINE void ge52_p1p1_to_ext_mb(ge52_ext_mb *r, const ge52_p1p1_mb *p)
 {
    fe52_mul(r->X, p->X, p->T);
    fe52_mul(r->Y, p->Y, p->Z);
@@ -98,26 +98,26 @@ __INLINE void ge52_p1p1_to_ext_mb(ge52_ext_mb *r, const ge52_p1p1_mb *p)
 
 
 /* set GE to neutral */
-__INLINE void neutral_ge52_homo_mb(ge52_homo_mb* ge)
+__MBX_INLINE void neutral_ge52_homo_mb(ge52_homo_mb* ge)
 {
    fe52_0_mb(ge->X);
    fe52_1_mb(ge->Y);
    fe52_1_mb(ge->Z);
 }
-__INLINE void neutral_ge52_ext_mb(ge52_ext_mb* ge)
+__MBX_INLINE void neutral_ge52_ext_mb(ge52_ext_mb* ge)
 {
    fe52_0_mb(ge->X);
    fe52_1_mb(ge->Y);
    fe52_0_mb(ge->T);
    fe52_1_mb(ge->Z);
 }
-__INLINE void neutral_ge52_precomp_mb(ge52_precomp_mb *ge)
+__MBX_INLINE void neutral_ge52_precomp_mb(ge52_precomp_mb *ge)
 {
    fe52_1_mb(ge->ysubx);
    fe52_1_mb(ge->yaddx);
    fe52_0_mb(ge->t2d);
 }
-__INLINE void neutral_ge52_cached_mb(ge52_cached_mb* ge)
+__MBX_INLINE void neutral_ge52_cached_mb(ge52_cached_mb* ge)
 {
    fe52_1_mb(ge->YsubX);
    fe52_1_mb(ge->YaddX);
@@ -126,19 +126,19 @@ __INLINE void neutral_ge52_cached_mb(ge52_cached_mb* ge)
 }
 
 /* move GE under mask (conditionally): r = k? a : b */
-__INLINE void ge52_cmov1_precomp_mb(ge52_precomp_mb* r, const ge52_precomp_mb* b, __mb_mask k, const ge52_precomp* a)
+__MBX_INLINE void ge52_cmov1_precomp_mb(ge52_precomp_mb* r, const ge52_precomp_mb* b, __mb_mask k, const ge52_precomp* a)
 {
    fe52_cmov1_mb(r->ysubx, b->ysubx, k, a->ysubx);
    fe52_cmov1_mb(r->yaddx, b->yaddx, k, a->yaddx);
    fe52_cmov1_mb(r->t2d,   b->t2d,   k, a->t2d);
 }
-__INLINE void cmov_ge52_precomp_mb(ge52_precomp_mb* r, const ge52_precomp_mb* b, __mb_mask k, const ge52_precomp_mb* a)
+__MBX_INLINE void cmov_ge52_precomp_mb(ge52_precomp_mb* r, const ge52_precomp_mb* b, __mb_mask k, const ge52_precomp_mb* a)
 {
    fe52_cmov_mb(r->ysubx, b->ysubx, k, a->ysubx);
    fe52_cmov_mb(r->yaddx, b->yaddx, k, a->yaddx);
    fe52_cmov_mb(r->t2d,   b->t2d,   k, a->t2d);
 }
-__INLINE void cmov_ge52_cached_mb(ge52_cached_mb* r, const ge52_cached_mb* b, __mb_mask k, const ge52_cached_mb* a)
+__MBX_INLINE void cmov_ge52_cached_mb(ge52_cached_mb* r, const ge52_cached_mb* b, __mb_mask k, const ge52_cached_mb* a)
 {
    fe52_cmov_mb(r->YsubX, b->YsubX, k, a->YsubX);
    fe52_cmov_mb(r->YaddX, b->YaddX, k, a->YaddX);
diff --git a/sources/ippcp/crypto_mb/include/internal/ed25519/ifma_arith_p25519.h b/sources/ippcp/crypto_mb/include/internal/ed25519/ifma_arith_p25519.h
index e8da4be2..10d7f9a5 100644
--- a/sources/ippcp/crypto_mb/include/internal/ed25519/ifma_arith_p25519.h
+++ b/sources/ippcp/crypto_mb/include/internal/ed25519/ifma_arith_p25519.h
@@ -47,19 +47,19 @@ typedef U64 fe52_mb[FE_LEN52];
 
 
 /* set FE to zero */
-__INLINE void fe52_0_mb(fe52_mb fe)
+__MBX_INLINE void fe52_0_mb(fe52_mb fe)
 {
    fe[0] = fe[1] = fe[2] = fe[3] = fe[4] = get_zero64();
 }
 /* set FE to 1 */
-__INLINE void fe52_1_mb(fe52_mb fe)
+__MBX_INLINE void fe52_1_mb(fe52_mb fe)
 {
    fe[0] = set1(1LL);
    fe[1] = fe[2] = fe[3] = fe[4] = get_zero64();
 }
 
 /* copy FE */
-__INLINE void fe52_copy_mb(fe52_mb r, const fe52_mb a)
+__MBX_INLINE void fe52_copy_mb(fe52_mb r, const fe52_mb a)
 {
    r[0] = a[0];
    r[1] = a[1];
@@ -69,7 +69,7 @@ __INLINE void fe52_copy_mb(fe52_mb r, const fe52_mb a)
 }
 
 /* convert fe52_mb => fe64_mb */
-__INLINE void fe52_to_fe64_mb(fe64_mb r, const fe52_mb a)
+__MBX_INLINE void fe52_to_fe64_mb(fe64_mb r, const fe52_mb a)
 {
    r[0] = xor64(slli64(a[1],52), a[0]);
    r[1] = xor64(slli64(a[2],40), srli64(a[1],12));
@@ -78,14 +78,14 @@ __INLINE void fe52_to_fe64_mb(fe64_mb r, const fe52_mb a)
 }
 
 /* check if FE is zero */
-__INLINE __mb_mask fe52_mb_is_zero(const fe52_mb a)
+__MBX_INLINE __mb_mask fe52_mb_is_zero(const fe52_mb a)
 {
    U64 t = or64(or64(a[0], a[1]), or64(or64(a[2], a[3]), a[4]));
    return cmpeq64_mask(t, get_zero64());
 }
 
 /* check if a==b */
-__INLINE __mb_mask fe52_mb_is_equ(const fe52_mb a, const fe52_mb b)
+__MBX_INLINE __mb_mask fe52_mb_is_equ(const fe52_mb a, const fe52_mb b)
 {
    __ALIGN64 fe52_mb t;
    t[0] = xor64(a[0], b[0]);
@@ -97,7 +97,7 @@ __INLINE __mb_mask fe52_mb_is_equ(const fe52_mb a, const fe52_mb b)
 }
 
 /* move FE under mask (conditionally): r = k? a : b */
-__INLINE void fe52_cmov1_mb(fe52_mb r, const fe52_mb b, __mb_mask k, const fe52 a)
+__MBX_INLINE void fe52_cmov1_mb(fe52_mb r, const fe52_mb b, __mb_mask k, const fe52 a)
 {
    r[0] = mask_mov64(b[0], k, set1(a[0]));
    r[1] = mask_mov64(b[1], k, set1(a[1]));
@@ -105,7 +105,8 @@ __INLINE void fe52_cmov1_mb(fe52_mb r, const fe52_mb b, __mb_mask k, const fe52
    r[3] = mask_mov64(b[3], k, set1(a[3]));
    r[4] = mask_mov64(b[4], k, set1(a[4]));
 }
-__INLINE void fe52_cmov_mb(fe52_mb r, const fe52_mb b, __mb_mask k, const fe52_mb a)
+OPTIMIZE_OFF_VS19
+__MBX_INLINE void fe52_cmov_mb(fe52_mb r, const fe52_mb b, __mb_mask k, const fe52_mb a)
 {
    r[0] = mask_mov64(b[0], k, a[0]);
    r[1] = mask_mov64(b[1], k, a[1]);
@@ -115,13 +116,13 @@ __INLINE void fe52_cmov_mb(fe52_mb r, const fe52_mb b, __mb_mask k, const fe52_m
 }
 
 /* swap FE under mask (conditionally): r = k? a : b */
-__INLINE void cswap_U64(U64* x, __mb_mask k, U64* y)
+__MBX_INLINE void cswap_U64(U64* x, __mb_mask k, U64* y)
 {
    *x = _mm512_mask_xor_epi64(*x, k, *x, *y);
    *y = _mm512_mask_xor_epi64(*y, k, *y, *x);
    *x = _mm512_mask_xor_epi64(*x, k, *x, *y);
 }
-__INLINE void fe52_cswap_mb(fe52_mb a, __mb_mask k, fe52_mb b)
+__MBX_INLINE void fe52_cswap_mb(fe52_mb a, __mb_mask k, fe52_mb b)
 {
    cswap_U64(&a[0], k, &b[0]);
    cswap_U64(&a[1], k, &b[1]);
diff --git a/sources/ippcp/crypto_mb/include/internal/ed25519/sha512.h b/sources/ippcp/crypto_mb/include/internal/ed25519/sha512.h
index 8c4bfc4a..795c253d 100644
--- a/sources/ippcp/crypto_mb/include/internal/ed25519/sha512.h
+++ b/sources/ippcp/crypto_mb/include/internal/ed25519/sha512.h
@@ -67,7 +67,7 @@ static __ALIGN64 const int64u sha512_cnt[] = {
 #define LSR32(x,nBits)  ((x)>>(nBits))
 #define LSL32(x,nBits)  ((x)<<(nBits))
 
-/* Rorate (right and left) of WORD */
+/* Rotate (right and left) of WORD */
 #if defined(_MSC_VER) && !defined( __ICL )
 #  include <stdlib.h>
 #  define ROR32(x, nBits)  _lrotr((x),(nBits))
@@ -81,7 +81,7 @@ static __ALIGN64 const int64u sha512_cnt[] = {
 #define LSR64(x,nBits)  ((x)>>(nBits))
 #define LSL64(x,nBits)  ((x)<<(nBits))
 
-/* Rorate (right and left) of DWORD */
+/* Rotate (right and left) of DWORD */
 #define ROR64(x, nBits) (LSR64((x),(nBits)) | LSL64((x),64-(nBits)))
 #define ROL64(x, nBits) ROR64((x),(64-(nBits)))
 
diff --git a/sources/ippcp/crypto_mb/include/internal/exp/ifma_exp_method.h b/sources/ippcp/crypto_mb/include/internal/exp/ifma_exp_method.h
index d774e317..a4f36e80 100644
--- a/sources/ippcp/crypto_mb/include/internal/exp/ifma_exp_method.h
+++ b/sources/ippcp/crypto_mb/include/internal/exp/ifma_exp_method.h
@@ -19,7 +19,7 @@
 
 #include <crypto_mb/defs.h>
 
-/* exponetiation processing window */
+/* exponentiation processing window */
 #define EXP_WIN_SIZE (5)
 #define EXP_WIN_MASK ((1<<EXP_WIN_SIZE) -1)
 
diff --git a/sources/ippcp/crypto_mb/include/internal/sm2/ifma_arith_sm2.h b/sources/ippcp/crypto_mb/include/internal/sm2/ifma_arith_sm2.h
index 7957b63a..4e06dd4c 100644
--- a/sources/ippcp/crypto_mb/include/internal/sm2/ifma_arith_sm2.h
+++ b/sources/ippcp/crypto_mb/include/internal/sm2/ifma_arith_sm2.h
@@ -29,20 +29,20 @@
 #define PSM2_LEN8   NUMBER_OF_DIGITS(PSM2_BITSIZE,8)
 
 /* set FE to zero */
-__INLINE void MB_FUNC_NAME(zero_FESM2_)(U64 T[])
+__MBX_INLINE void MB_FUNC_NAME(zero_FESM2_)(U64 T[])
 {
    T[0] = T[1] = T[2] = T[3] = T[4] = get_zero64();
 }
 
 /* check if FE is zero */
-__INLINE __mb_mask MB_FUNC_NAME(is_zero_FESM2_)(const U64 T[])
+__MBX_INLINE __mb_mask MB_FUNC_NAME(is_zero_FESM2_)(const U64 T[])
 {
    U64 Z = or64(or64(T[0], T[1]), or64(or64(T[2], T[3]), T[4]));
    return cmpeq64_mask(Z, get_zero64());
 }
 
 /* move field element */
-__INLINE void MB_FUNC_NAME(mov_FESM2_)(U64 r[], const U64 a[])
+__MBX_INLINE void MB_FUNC_NAME(mov_FESM2_)(U64 r[], const U64 a[])
 {
    r[0] = a[0];
    r[1] = a[1];
@@ -52,7 +52,7 @@ __INLINE void MB_FUNC_NAME(mov_FESM2_)(U64 r[], const U64 a[])
 }
 
 /* move coordinate using mask: R = k? A : B */
-__INLINE void MB_FUNC_NAME(mask_mov_FESM2_)(U64 R[], const U64 B[], __mb_mask k, const U64 A[])
+__MBX_INLINE void MB_FUNC_NAME(mask_mov_FESM2_)(U64 R[], const U64 B[], __mb_mask k, const U64 A[])
 {
    R[0] = mask_mov64(B[0], k, A[0]);
    R[1] = mask_mov64(B[1], k, A[1]);
@@ -61,7 +61,7 @@ __INLINE void MB_FUNC_NAME(mask_mov_FESM2_)(U64 R[], const U64 B[], __mb_mask k,
    R[4] = mask_mov64(B[4], k, A[4]);
 }
 
-__INLINE void MB_FUNC_NAME(secure_mask_mov_FESM2_)(U64 R[], U64 B[], __mb_mask k, const U64 A[])
+__MBX_INLINE void MB_FUNC_NAME(secure_mask_mov_FESM2_)(U64 R[], U64 B[], __mb_mask k, const U64 A[])
 {
    R[0] = select64(k, B[0], (U64*)(&A[0]));
    R[1] = select64(k, B[1], (U64*)(&A[1]));
@@ -71,7 +71,7 @@ __INLINE void MB_FUNC_NAME(secure_mask_mov_FESM2_)(U64 R[], U64 B[], __mb_mask k
 }
 
 /* compare two FE */
-__INLINE __mb_mask MB_FUNC_NAME(cmp_lt_FESM2_)(const U64 A[], const U64 B[])
+__MBX_INLINE __mb_mask MB_FUNC_NAME(cmp_lt_FESM2_)(const U64 A[], const U64 B[])
 {
    /* r = a - b */
    U64 r0 = sub64(A[0], B[0]);
@@ -90,7 +90,7 @@ __INLINE __mb_mask MB_FUNC_NAME(cmp_lt_FESM2_)(const U64 A[], const U64 B[])
    return cmp64_mask(r4, get_zero64(), _MM_CMPINT_LT);
 }
 
-__INLINE __mb_mask MB_FUNC_NAME(cmp_eq_FESM2_)(const U64 A[], const U64 B[]) 
+__MBX_INLINE __mb_mask MB_FUNC_NAME(cmp_eq_FESM2_)(const U64 A[], const U64 B[])
 {
    __ALIGN64 U64 msg[PSM2_LEN52];
 
@@ -99,7 +99,7 @@ __INLINE __mb_mask MB_FUNC_NAME(cmp_eq_FESM2_)(const U64 A[], const U64 B[])
    msg[2] = xor64(A[2], B[2]);
    msg[3] = xor64(A[3], B[3]);
    msg[4] = xor64(A[4], B[4]);
-   
+
    return MB_FUNC_NAME(is_zero_FESM2_)(msg);
 }
 
diff --git a/sources/ippcp/crypto_mb/include/internal/sm2/ifma_ecpoint_sm2.h b/sources/ippcp/crypto_mb/include/internal/sm2/ifma_ecpoint_sm2.h
index 00831ab5..11ee909e 100644
--- a/sources/ippcp/crypto_mb/include/internal/sm2/ifma_ecpoint_sm2.h
+++ b/sources/ippcp/crypto_mb/include/internal/sm2/ifma_ecpoint_sm2.h
@@ -36,13 +36,13 @@ typedef struct {
 } SINGLE_SM2_POINT_AFFINE;
 
 /* check if coordinate is zero */
-__INLINE __mb_mask MB_FUNC_NAME(is_zero_point_cordinate_)(const U64 T[])
+__MBX_INLINE __mb_mask MB_FUNC_NAME(is_zero_point_cordinate_)(const U64 T[])
 {
    return MB_FUNC_NAME(is_zero_FESM2_)(T);
 }
 
 /* set point to infinity */
-__INLINE void MB_FUNC_NAME(set_point_to_infinity_)(SM2_POINT* r)
+__MBX_INLINE void MB_FUNC_NAME(set_point_to_infinity_)(SM2_POINT* r)
 {
    r->X[0] = r->X[1] = r->X[2] = r->X[3] = r->X[4] = get_zero64();
    r->Y[0] = r->Y[1] = r->Y[2] = r->Y[3] = r->Y[4] = get_zero64();
@@ -50,7 +50,7 @@ __INLINE void MB_FUNC_NAME(set_point_to_infinity_)(SM2_POINT* r)
 }
 
 /* set point to infinity by mask */
-__INLINE void MB_FUNC_NAME(mask_set_point_to_infinity_)(SM2_POINT* r, __mb_mask mask)
+__MBX_INLINE void MB_FUNC_NAME(mask_set_point_to_infinity_)(SM2_POINT* r, __mb_mask mask)
 {
    U64 zeros = get_zero64();
 
@@ -74,7 +74,7 @@ __INLINE void MB_FUNC_NAME(mask_set_point_to_infinity_)(SM2_POINT* r, __mb_mask
 }
 
 /* set affine point to infinity */
-__INLINE void MB_FUNC_NAME(set_point_affine_to_infinity_)(SM2_POINT_AFFINE* r)
+__MBX_INLINE void MB_FUNC_NAME(set_point_affine_to_infinity_)(SM2_POINT_AFFINE* r)
 {
    r->x[0] = r->x[1] = r->x[2] = r->x[3] = r->x[4] = get_zero64();
    r->y[0] = r->y[1] = r->y[2] = r->y[3] = r->y[4] = get_zero64();
diff --git a/sources/ippcp/crypto_mb/include/internal/sm3/sm3_common.h b/sources/ippcp/crypto_mb/include/internal/sm3/sm3_common.h
index 1dbe59fc..f0b0f408 100644
--- a/sources/ippcp/crypto_mb/include/internal/sm3/sm3_common.h
+++ b/sources/ippcp/crypto_mb/include/internal/sm3/sm3_common.h
@@ -46,7 +46,7 @@
 #define HASH_BUFF(ctx)              ((ctx)->msg_buffer)
 
 /*
-// constants 
+// constants
 */
 
 static const int32u sm3_iv[] = { 0x7380166F, 0x4914B2B9, 0x172442D7, 0xDA8A0600,
@@ -65,11 +65,11 @@ __ALIGN64 static const int32u tj_calculated[] = { 0x79CC4519,0xF3988A32,0xE73114
                                                   0x879D8A7A,0x0F3B14F5,0x1E7629EA,0x3CEC53D4,0x79D8A7A8,0xF3B14F50,0xE7629EA1,0xCEC53D43 };
 
 /*
-// internal functions 
+// internal functions
 */
 
 
-__INLINE void pad_block(int8u padding_byte, void* dst_p, int num_bytes)
+__MBX_INLINE void pad_block(int8u padding_byte, void* dst_p, int num_bytes)
 {
    int8u* d  = (int8u*)dst_p;
    int k;
@@ -77,7 +77,7 @@ __INLINE void pad_block(int8u padding_byte, void* dst_p, int num_bytes)
       d[k] = padding_byte;
 }
 
-__INLINE void TRANSPOSE_8X8_I32(__m256i *v0, __m256i *v1, __m256i *v2, __m256i *v3,
+__MBX_INLINE void TRANSPOSE_8X8_I32(__m256i *v0, __m256i *v1, __m256i *v2, __m256i *v3,
     __m256i *v4, __m256i *v5, __m256i *v6, __m256i *v7)
 {
     __m256i w0, w1, w2, w3, w4, w5, w6, w7;
@@ -134,7 +134,7 @@ __INLINE void TRANSPOSE_8X8_I32(__m256i *v0, __m256i *v1, __m256i *v2, __m256i *
     *v7 = _mm256_permute2x128_si256(x3, x7, 0b110001);
 }
 
-__INLINE void MASK_TRANSPOSE_8X8_I32(int32u* out[8], const int32u* const inp[8], __mmask16 mb_mask) {
+__MBX_INLINE void MASK_TRANSPOSE_8X8_I32(int32u* out[8], const int32u* const inp[8], __mmask16 mb_mask) {
     __m256i v0 = _mm256_loadu_si256((__m256i*)inp[0]);
     __m256i v1 = _mm256_loadu_si256((__m256i*)inp[1]);
     __m256i v2 = _mm256_loadu_si256((__m256i*)inp[2]);
@@ -158,7 +158,7 @@ __INLINE void MASK_TRANSPOSE_8X8_I32(int32u* out[8], const int32u* const inp[8],
 
 }
 
-__INLINE void TRANSPOSE_8X16_I32(int32u* out[16], const int32u* const inp[8], __mmask16 mb_mask) {
+__MBX_INLINE void TRANSPOSE_8X16_I32(int32u* out[16], const int32u* const inp[8], __mmask16 mb_mask) {
     __m256i v0 = _mm256_loadu_si256((__m256i*)inp[0]);
     __m256i v1 = _mm256_loadu_si256((__m256i*)inp[1]);
     __m256i v2 = _mm256_loadu_si256((__m256i*)inp[2]);
diff --git a/sources/ippcp/crypto_mb/include/internal/sm4/sm4_gcm_mb.h b/sources/ippcp/crypto_mb/include/internal/sm4/sm4_gcm_mb.h
index e1d18aca..e826715a 100644
--- a/sources/ippcp/crypto_mb/include/internal/sm4/sm4_gcm_mb.h
+++ b/sources/ippcp/crypto_mb/include/internal/sm4/sm4_gcm_mb.h
@@ -22,12 +22,12 @@
 #define SM4_GCM_MB_H
 
 /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-/* 
-// Constant from NIST Special Publication 800-38D 
+/*
+// Constant from NIST Special Publication 800-38D
 // (Recommendation for GCMmode, p.5.2.1.1 Input Data)
 // len(P) <= 2^39-256 bits
 */
-static const int64u MAX_TXT_LEN = ((int64u)1 << 36) - 32; // length in bytes 
+static const int64u MAX_TXT_LEN = ((int64u)1 << 36) - 32; // length in bytes
 
 /*
 // Internal functions
@@ -167,7 +167,7 @@ static const int rearrangeOrder[] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3,
    to[14] = from[11];       \
    to[15] = from[15];
 
-__INLINE __m512i inc_block32(__m512i x, const int8u *increment) { return mask_add_epi32(x, 0x1111, x, M512(increment)); }
+__MBX_INLINE __m512i inc_block32(__m512i x, const int8u *increment) { return mask_add_epi32(x, 0x1111, x, M512(increment)); }
 
 static __ALIGN64 const int8u initialInc[] = { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                               1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
diff --git a/sources/ippcp/crypto_mb/include/internal/sm4/sm4_mb.h b/sources/ippcp/crypto_mb/include/internal/sm4/sm4_mb.h
index 99cd25a2..d080e03f 100644
--- a/sources/ippcp/crypto_mb/include/internal/sm4/sm4_mb.h
+++ b/sources/ippcp/crypto_mb/include/internal/sm4/sm4_mb.h
@@ -320,25 +320,25 @@ EXTERN_C void sm4_xts_kernel_mb16(int8u* pa_out[SM4_LINES], const int8u* pa_inp[
                                   const int8u* pa_tweak[SM4_LINES], __mmask16 mb_mask, const int dir);
 
 // The transformation based on SM4 sbox algebraic structure, parameters were computed manually
-__INLINE __m512i sBox512(__m512i block)
+__MBX_INLINE __m512i sBox512(__m512i block)
 {
     block = _mm512_gf2p8affine_epi64_epi8(block, M512(affineIn), 0x65);
     block = _mm512_gf2p8affineinv_epi64_epi8(block, M512(affineOut), 0xd3);
     return block;
 }
 
-__INLINE __m512i Lblock512(__m512i x)
+__MBX_INLINE __m512i Lblock512(__m512i x)
 {
     return _mm512_ternarylogic_epi32(_mm512_xor_si512(_mm512_rol_epi32(x, 2), _mm512_rol_epi32(x, 10)), _mm512_rol_epi32(x, 18),
                                      _mm512_shuffle_epi8 (x, _mm512_loadu_si512(shuf8)), 0x96);
 }
 
-__INLINE __m512i Lkey512(__m512i x)
+__MBX_INLINE __m512i Lkey512(__m512i x)
 {
     return _mm512_xor_epi32(_mm512_rol_epi32(x, 13), _mm512_rol_epi32(x, 23));
 }
 
-__INLINE __m512i IncBlock512(__m512i x, const int8u* increment)
+__MBX_INLINE __m512i IncBlock512(__m512i x, const int8u* increment)
 {
     __m512i t = _mm512_add_epi64(x, M512(increment));
     __mmask8 carryMask = _mm512_cmplt_epu64_mask(t, x);
@@ -472,7 +472,7 @@ __INLINE __m512i IncBlock512(__m512i x, const int8u* increment)
    \
    T0=K0,T1=K1,T2=K2,T3=K3
 
-__INLINE void TRANSPOSE_16x4_I32_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __m512i* t3, const int8u* p_inp[16], __mmask16 mb_mask) {
+__MBX_INLINE void TRANSPOSE_16x4_I32_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __m512i* t3, const int8u* p_inp[16], __mmask16 mb_mask) {
     __mmask16 loc_mb_mask = mb_mask;
 
     // L0 - L3
@@ -510,7 +510,7 @@ __INLINE void TRANSPOSE_16x4_I32_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __
     *t3 = _mm512_unpackhi_epi64(z1, z3);
 }
 
-__INLINE void TRANSPOSE_16x4_I32_XMM_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __m512i* t3, const __m128i in[16]) {
+__MBX_INLINE void TRANSPOSE_16x4_I32_XMM_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __m512i* t3, const __m128i in[16]) {
     // L0 - L3
     __m512i z0 = _mm512_castsi128_si512(in[0]);
     __m512i z1 = _mm512_castsi128_si512(in[1]);
@@ -546,7 +546,7 @@ __INLINE void TRANSPOSE_16x4_I32_XMM_EPI32(__m512i* t0, __m512i* t1, __m512i* t2
     *t3 = _mm512_unpackhi_epi64(z1, z3);
 }
 
-__INLINE void TRANSPOSE_4x16_I32_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __m512i* t3, int8u* p_out[16], __mmask16 mb_mask) {
+__MBX_INLINE void TRANSPOSE_4x16_I32_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __m512i* t3, int8u* p_out[16], __mmask16 mb_mask) {
 
     #define STORE_RESULT(OUT, store_mask, loc_mb_mask, Ti)                              \
             _mm512_mask_storeu_epi32(OUT, store_mask * (0x1&loc_mb_mask), Ti);    \
@@ -591,7 +591,7 @@ __INLINE void TRANSPOSE_4x16_I32_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __
 
 }
 
-__INLINE void TRANSPOSE_4x16_I32_XMM_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __m512i* t3, __m128i out[16]) {
+__MBX_INLINE void TRANSPOSE_4x16_I32_XMM_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __m512i* t3, __m128i out[16]) {
 
     __m512i z0 = _mm512_unpacklo_epi32(*t0, *t1);
     __m512i z1 = _mm512_unpackhi_epi32(*t0, *t1);
@@ -630,7 +630,7 @@ __INLINE void TRANSPOSE_4x16_I32_XMM_EPI32(__m512i* t0, __m512i* t1, __m512i* t2
 
 }
 
-__INLINE void TRANSPOSE_4x16_I32_O128_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __m512i* t3, __m128i p_out[16], __mmask16 mb_mask) {
+__MBX_INLINE void TRANSPOSE_4x16_I32_O128_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __m512i* t3, __m128i p_out[16], __mmask16 mb_mask) {
 
     #define STORE_RESULT(OUT, store_mask, loc_mb_mask, Ti)                              \
             _mm512_mask_storeu_epi32(OUT, store_mask * (0x1&loc_mb_mask), Ti);    \
@@ -675,7 +675,7 @@ __INLINE void TRANSPOSE_4x16_I32_O128_EPI32(__m512i* t0, __m512i* t1, __m512i* t
 
 }
 
-__INLINE void TRANSPOSE_4x16_I32_EPI8(__m512i t0, __m512i t1, __m512i t2, __m512i t3, int8u* p_out[16], int* p_loc_len, __mmask16 mb_mask) {
+__MBX_INLINE void TRANSPOSE_4x16_I32_EPI8(__m512i t0, __m512i t1, __m512i t2, __m512i t3, int8u* p_out[16], int* p_loc_len, __mmask16 mb_mask) {
 
     #define STORE_RESULT_EPI8(OUT, store_mask, loc_mb_mask, Ti)                              \
             _mm512_mask_storeu_epi8(OUT, store_mask * (0x1&loc_mb_mask), Ti);    \
@@ -737,7 +737,7 @@ __INLINE void TRANSPOSE_4x16_I32_EPI8(__m512i t0, __m512i t1, __m512i t2, __m512
     STORE_RESULT_EPI8((__m128i*)p_out[15] - 3, stream_mask << 48, loc_mb_mask, t3);
 }
 
-__INLINE void TRANSPOSE_AND_XOR_4x16_I32_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __m512i* t3, int8u* p_out[16], const int8u* p_iv[16], __mmask16 mb_mask) {
+__MBX_INLINE void TRANSPOSE_AND_XOR_4x16_I32_EPI32(__m512i* t0, __m512i* t1, __m512i* t2, __m512i* t3, int8u* p_out[16], const int8u* p_iv[16], __mmask16 mb_mask) {
 
     #define XOR_AND_STORE_RESULT(OUT, store_mask, loc_mb_mask, Ti, IV, TMP)                              \
             TMP = _mm512_maskz_loadu_epi32(store_mask * (0x1&loc_mb_mask), IV);                       \
@@ -787,7 +787,7 @@ __INLINE void TRANSPOSE_AND_XOR_4x16_I32_EPI32(__m512i* t0, __m512i* t1, __m512i
     XOR_AND_STORE_RESULT((__m128i*)p_out[15] - 3, 0xF000, loc_mb_mask, *t3, (__m128i*)p_iv[15] - 3, z3);
 }
 
-__INLINE void TRANSPOSE_AND_XOR_4x16_I32_EPI8(__m512i t0, __m512i t1, __m512i t2, __m512i t3, int8u* p_out[16], const int8u* p_iv[16], int* p_loc_len, __mmask16 mb_mask) {
+__MBX_INLINE void TRANSPOSE_AND_XOR_4x16_I32_EPI8(__m512i t0, __m512i t1, __m512i t2, __m512i t3, int8u* p_out[16], const int8u* p_iv[16], int* p_loc_len, __mmask16 mb_mask) {
 
     #define XOR_AND_STORE_RESULT_EPI8(OUT, store_mask, loc_mb_mask, Ti, IV, TMP)                            \
             TMP = _mm512_maskz_loadu_epi8(store_mask * (0x1&loc_mb_mask), IV);                       \
diff --git a/sources/ippcp/crypto_mb/src/cmake/windows/IntelLLVM.cmake b/sources/ippcp/crypto_mb/src/cmake/windows/IntelLLVM.cmake
new file mode 100644
index 00000000..312f7466
--- /dev/null
+++ b/sources/ippcp/crypto_mb/src/cmake/windows/IntelLLVM.cmake
@@ -0,0 +1,94 @@
+#===============================================================================
+# Copyright (C) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+#===============================================================================
+
+# Security Linker flags
+
+set(LINK_FLAG_SECURITY "")
+
+# Specifies whether to generate an executable image that can be randomly rebased at load time.
+set(LINK_FLAG_SECURITY "${LINK_FLAG_SECURITY} /DYNAMICBASE")
+# This option modifies the header of an executable image, a .dll file or .exe file, to indicate whether ASLR with 64-bit addresses is supported.
+set(LINK_FLAG_SECURITY "${LINK_FLAG_SECURITY} /HIGHENTROPYVA")
+# The /LARGEADDRESSAWARE option tells the linker that the application can handle addresses larger than 2 gigabytes.
+set(LINK_FLAG_SECURITY "${LINK_FLAG_SECURITY} /LARGEADDRESSAWARE")
+# Indicates that an executable is compatible with the Windows Data Execution Prevention (DEP) feature
+set(LINK_FLAG_SECURITY "${LINK_FLAG_SECURITY} /NXCOMPAT")
+
+# Security Compiler flags
+
+set(CMAKE_C_FLAGS_SECURITY "")
+# Detect some buffer overruns.
+set(CMAKE_C_FLAGS_SECURITY "${CMAKE_C_FLAGS_SECURITY} /GS")
+# Warning level = 3
+set(CMAKE_C_FLAGS_SECURITY "${CMAKE_C_FLAGS_SECURITY} /W3")
+# Changes all warnings to errors.
+set(CMAKE_C_FLAGS_SECURITY "${CMAKE_C_FLAGS_SECURITY} /WX")
+# Enable Intel® Control-Flow Enforcement Technology (Intel® CET) protection
+set(CMAKE_C_FLAGS_SECURITY "${CMAKE_C_FLAGS_SECURITY} -fcf-protection:full")
+# Changes all warnings to errors.
+set(CMAKE_C_FLAGS_SECURITY "${CMAKE_C_FLAGS_SECURITY} /WX")
+
+# Linker flags
+
+# Add export files
+if(MBX_FIPS_MODE)
+  set(LINK_FLAGS_DYNAMIC "/DEF:${CRYPTO_MB_SOURCES_DIR}/cmake/dll_export/crypto_mb_fips_selftests.defs")
+else()
+  set(LINK_FLAGS_DYNAMIC "/DEF:${CRYPTO_MB_SOURCES_DIR}/cmake/dll_export/crypto_mb.defs")
+endif()
+
+# Disables linking to Intel® libraries
+set(LINK_FLAG_DYNAMIC_WINDOWS "${LINK_FLAG_DYNAMIC_WINDOWS} /Qno-intel-lib")
+
+# Compiler flags
+
+# Tells the compiler to align functions and loops
+# set(CMAKE_C_FLAGS "/Qfnalign:32 /Qalign-loops:32")
+# Suppress warning #10120: overriding '/O2' with '/O3'
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -wd10120 -Wno-unused-command-line-argument -Wno-unused-parameter -Wno-pointer-sign -Wno-sign-compare -Wno-static-in-inline /Qno-intel-lib")
+# Ensures that compilation takes place in a freestanding environment
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qfreestanding")
+
+if(CODE_COVERAGE)
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qrof-gen:srcpos /Qprof-dir:${PROF_DATA_DIR}")
+endif()
+
+set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS}")
+
+# Tells the compiler to conform to a specific language standard.
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qstd=c99")
+
+# Causes the application to use the multithread, static version of the run-time library
+set(CMAKE_C_FLAGS_RELEASE "/MT")
+# Optimization level = 3
+set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /O3")
+# No-debug macro
+set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /DNDEBUG")
+set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}")
+
+# Causes the application to use the multithread, static version of the run-time library (debug version).
+set(CMAKE_C_FLAGS_DEBUG "/MTd")
+# The /Zi option produces a separate PDB file that contains all the symbolic debugging information for use with the debugger.
+set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /Zi")
+# Turns off all optimizations.
+set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /Od")
+# Debug macro
+set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /D_DEBUG")
+set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}")
+
+# Optimisation dependent flags
+set(AVX512_CFLAGS "-march=icelake-server -mavx512dq -mavx512ifma -mavx512f -mavx512vbmi2 -mavx512cd -mavx512bw -mbmi2")
diff --git a/sources/ippcp/crypto_mb/src/common/cpu_features.c b/sources/ippcp/crypto_mb/src/common/cpu_features.c
index ead6995b..d3e0fb53 100644
--- a/sources/ippcp/crypto_mb/src/common/cpu_features.c
+++ b/sources/ippcp/crypto_mb/src/common/cpu_features.c
@@ -14,6 +14,9 @@
 * limitations under the License.
 *************************************************************************/
 
+#if defined( _WIN32 ) || defined( _WIN64 )
+#include <intrin.h>
+#endif
 #include <crypto_mb/cpu_features.h>
 #include <internal/common/ifma_defs.h>
 
@@ -58,7 +61,7 @@
 #define edx_   (3)
 
 
-__INLINE void _mbcp_cpuid(int32u buf[4], int32u leaf, int32u subleaf)
+__MBX_INLINE void _mbcp_cpuid(int32u buf[4], int32u leaf, int32u subleaf)
 {
    #ifdef __GNUC__
    __asm__ ("cpuid" : "=a" (buf[0]), "=b" (buf[1]), "=c" (buf[2]), "=d" (buf[3]) : "a" (leaf), "c" (subleaf));
diff --git a/sources/ippcp/crypto_mb/src/common/crypto_mb_res.gen b/sources/ippcp/crypto_mb/src/common/crypto_mb_res.gen
index 3fc35cb9..5d918721 100644
--- a/sources/ippcp/crypto_mb/src/common/crypto_mb_res.gen
+++ b/sources/ippcp/crypto_mb/src/common/crypto_mb_res.gen
@@ -38,7 +38,7 @@ BEGIN
         BLOCK "040904b0"
         BEGIN
             VALUE "CompanyName", "Intel Corporation.\0"
-            VALUE "FileVersion", STR( MBX_VERSION() ) "\0"
+            VALUE "FileVersion", STR_FILE_MBX_VERSION() "\0"
             VALUE "ProductName", MBX_LIB_SHORTNAME() ". Intel(R) Integrated Performance Primitives. " MBX_LIB_LONGNAME() ".\0"
             VALUE "ProductVersion", CRYPTO_MB_STR_VERSION() "\0"
             VALUE "LegalCopyright", "Copyright (C) 1999-2021, Intel Corporation. All rights reserved.\0"
diff --git a/sources/ippcp/crypto_mb/src/common/ifma_cvt52.c b/sources/ippcp/crypto_mb/src/common/ifma_cvt52.c
index e6db178c..8898ef79 100644
--- a/sources/ippcp/crypto_mb/src/common/ifma_cvt52.c
+++ b/sources/ippcp/crypto_mb/src/common/ifma_cvt52.c
@@ -33,11 +33,11 @@
 
 #define MIN(a, b) ( ((a) < (b)) ? a : b )
 
-__INLINE __mmask8 MB_MASK(int L) {
+__MBX_INLINE __mmask8 MB_MASK(int L) {
    return (L > 0) ? (__mmask8)0xFF : (__mmask8)0;
 }
 
-__INLINE __mmask64 SB_MASK1(int L, int REV)
+__MBX_INLINE __mmask64 SB_MASK1(int L, int REV)
 {
    if (L <= 0)
       return (__mmask64)0x0;
@@ -65,7 +65,7 @@ __INLINE __mmask64 SB_MASK1(int L, int REV)
 //    - 8 hex strings -> mb8
 */
 DISABLE_OPTIMIZATION
-__INLINE void transform_8sb_to_mb8(U64 out_mb8[], int bitLen, int8u *inp[8], int inpLen[8], int flag) {
+__MBX_INLINE void transform_8sb_to_mb8(U64 out_mb8[], int bitLen, int8u *inp[8], int inpLen[8], int flag) {
    // inverse bytes (reverse=1)
    const __m512i bswap_mask = _mm512_set_epi64(
                      0x0001020304050607, 0x08090a0b0c0d0e0f,
@@ -254,7 +254,7 @@ int8u ifma_HexStr8_to_mb8(int64u out_mb8[][8], const int8u* const pStr[8], int b
 //    - mb8 -> 8 hex strings
 */
 DISABLE_OPTIMIZATION
-__INLINE void transform_mb8_to_8sb(int8u* out[8], int outLen[8], const U64 inp_mb8[], int bitLen, int flag)
+__MBX_INLINE void transform_mb8_to_8sb(int8u* out[8], int outLen[8], const U64 inp_mb8[], int bitLen, int flag)
 {
    // inverse bytes (reverse=1)
    const __m512i bswap_mask = _mm512_set_epi64(
diff --git a/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_n384.c b/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_n384.c
index 3752da68..38a5de23 100644
--- a/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_n384.c
+++ b/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_n384.c
@@ -310,7 +310,7 @@ void MB_FUNC_NAME(ifma_frommont52_n384_)(U64 r[], const U64 a[])
 #define fe52_mul    MB_FUNC_NAME(ifma_amm52_n384_)
 
 /* r = base^(2^n) */
-__INLINE void fe52_sqr_pwr(U64 r[], const U64 base[], int n)
+__MBX_INLINE void fe52_sqr_pwr(U64 r[], const U64 base[], int n)
 {
    if(r!=base) {
       fe52_sqr(r,base);
diff --git a/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_n521.c b/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_n521.c
index c92a2d6e..37b3b6f0 100644
--- a/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_n521.c
+++ b/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_n521.c
@@ -237,7 +237,7 @@ void MB_FUNC_NAME(ifma_ams52_n521_)(U64 r[], const U64 va[])
    U64 r20, r21;
    U64 u;
 
-   r0  = r1  = r2  = r3  = r4  = r5  = r6  = r7  = r8  = r9  = r10 = 
+   r0  = r1  = r2  = r3  = r4  = r5  = r6  = r7  = r8  = r9  = r10 =
    r11 = r12 = r13 = r14 = r15 = r16 = r17 = r18 = r19 = r20 = r21 = get_zero64();
 
    // full square
@@ -399,7 +399,7 @@ void MB_FUNC_NAME(ifma_frommont52_n521_)(U64 r[], const U64 a[])
 #define fe52_mul    MB_FUNC_NAME(ifma_amm52_n521_)
 
 /* r = base^(2^n) */
-__INLINE void fe52_sqr_pwr(U64 r[], const U64 base[], int n)
+__MBX_INLINE void fe52_sqr_pwr(U64 r[], const U64 base[], int n)
 {
    if(r!=base) {
       fe52_sqr(r,base);
@@ -504,7 +504,7 @@ void MB_FUNC_NAME(ifma_aminv52_n521_)(U64 r[], const U64 z[])
  Specialized single operations over n521  add, sub, neg
 
 =====================================================================*/
-__INLINE __mb_mask MB_FUNC_NAME(lt_mbx_digit_)(const U64 a, const U64 b, const __mb_mask lt_mask)
+__MBX_INLINE __mb_mask MB_FUNC_NAME(lt_mbx_digit_)(const U64 a, const U64 b, const __mb_mask lt_mask)
 {
    U64 d = mask_sub64(sub64(a, b), lt_mask, sub64(a, b), set1(1));
    return cmp64_mask(d, get_zero64(), _MM_CMPINT_LT);
diff --git a/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_p384.c b/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_p384.c
index 20391df5..57232dc5 100644
--- a/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_p384.c
+++ b/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_p384.c
@@ -332,7 +332,7 @@ void MB_FUNC_NAME(ifma_frommont52_p384_)(U64 r[], const U64 a[])
 #define fe52_mul  MB_FUNC_NAME(ifma_amm52_p384_)
 
 /* r = base^(2^n) */
-__INLINE void fe52_sqr_pwr(U64 r[], const U64 base[], int n)
+__MBX_INLINE void fe52_sqr_pwr(U64 r[], const U64 base[], int n)
 {
    if(r!=base)
       MB_FUNC_NAME(mov_FE384_)(r, base);
@@ -402,7 +402,7 @@ void MB_FUNC_NAME(ifma_aminv52_p384_)(U64 r[], const U64 z[])
  Specialized single operations over p384:  add, sub, neg
 
 =====================================================================*/
-__INLINE __mb_mask MB_FUNC_NAME(lt_mbx_digit_)(const U64 a, const U64 b, const __mb_mask lt_mask)
+__MBX_INLINE __mb_mask MB_FUNC_NAME(lt_mbx_digit_)(const U64 a, const U64 b, const __mb_mask lt_mask)
 {
    U64 d = mask_sub64(sub64(a, b), lt_mask, sub64(a, b), set1(1));
    return cmp64_mask(d, get_zero64(), _MM_CMPINT_LT);
diff --git a/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_p521.c b/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_p521.c
index 28c7979a..ea6896f9 100644
--- a/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_p521.c
+++ b/sources/ippcp/crypto_mb/src/ecnist/ifma_arith_p521.c
@@ -99,7 +99,7 @@ void MB_FUNC_NAME(ifma_amm52_p521_)(U64 r[], const U64 va[], const U64 vb[])
    U64 r10, r11, r12, r13, r14, r15, r16, r17, r18, r19;
    U64 r20, r21;
 
-   r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = r8 = r9 = 
+   r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = r8 = r9 =
    r10 = r11 = r12 = r13 = r14 = r15 = r16 = r17 = r18 = r19=
    r20 = r21 = get_zero64();
 
@@ -282,7 +282,7 @@ void MB_FUNC_NAME(ifma_ams52_p521_)(U64 r[], const U64 va[])
    U64 r10, r11, r12, r13, r14, r15, r16, r17, r18, r19;
    U64 r20, r21;
 
-   r0  = r1  = r2  = r3  = r4  = r5  = r6  = r7  = r8  = r9  = r10 = 
+   r0  = r1  = r2  = r3  = r4  = r5  = r6  = r7  = r8  = r9  = r10 =
    r11 = r12 = r13 = r14 = r15 = r16 = r17 = r18 = r19 = r20 = r21 = get_zero64();
 
    // full square
@@ -486,7 +486,7 @@ void MB_FUNC_NAME(ifma_frommont52_p521_)(U64 r[], const U64 a[])
 #define fe52_mul  MB_FUNC_NAME(ifma_amm52_p521_)
 
 /* r = base^(2^n) */
-__INLINE void fe52_sqr_pwr(U64 r[], const U64 base[], int n)
+__MBX_INLINE void fe52_sqr_pwr(U64 r[], const U64 base[], int n)
 {
    if(r!=base)
       MB_FUNC_NAME(mov_FE521_)(r, base);
@@ -574,7 +574,7 @@ void MB_FUNC_NAME(ifma_aminv52_p521_)(U64 r[], const U64 z[])
  Specialized single operations over p521:  add, sub, neg
 
 =====================================================================*/
-__INLINE __mb_mask MB_FUNC_NAME(lt_mbx_digit_)(const U64 a, const U64 b, const __mb_mask lt_mask)
+__MBX_INLINE __mb_mask MB_FUNC_NAME(lt_mbx_digit_)(const U64 a, const U64 b, const __mb_mask lt_mask)
 {
    U64 d = mask_sub64(sub64(a, b), lt_mask, sub64(a, b), set1(1));
    return cmp64_mask(d, get_zero64(), _MM_CMPINT_LT);
diff --git a/sources/ippcp/crypto_mb/src/ecnist/ifma_ecdsa_p256.c b/sources/ippcp/crypto_mb/src/ecnist/ifma_ecdsa_p256.c
index 184fa35a..43a982fe 100644
--- a/sources/ippcp/crypto_mb/src/ecnist/ifma_ecdsa_p256.c
+++ b/sources/ippcp/crypto_mb/src/ecnist/ifma_ecdsa_p256.c
@@ -57,7 +57,7 @@ static void nistp256_ecdsa_inv_keys_mb8(U64 inv_skey[],
 // r = ([skey]*G).x mod n256
 //
 // note: pay attention on skey[] presenttaion
-//       it should be transposed and zero expanded 
+//       it should be transposed and zero expanded
 */
 static __mb_mask nistp256_ecdsa_sign_r_mb8(U64 sign_r[],
                                      const U64 skey[],
@@ -180,7 +180,7 @@ static __mb_mask nistp256_ecdsa_verify_mb8(U64 sign_r[],
 // pre-computation of ECDSA signature
 //
 // pa_inv_eph_skey[] array of pointers to the inversion of signer's ephemeral private keys
-// pa_sign_rp[]      array of pointers to the r-components of the signatures 
+// pa_sign_rp[]      array of pointers to the r-components of the signatures
 // pa_eph_skey[]     array of pointers to the ephemeral (nonce) signer's ephemeral private keys
 // pBuffer           pointer to the scratch buffer
 //
@@ -256,10 +256,10 @@ mbx_status mbx_nistp256_ecdsa_sign_setup_mb8(int64u* pa_inv_eph_skey[8],
 /*
 // computes ECDSA signature
 //
-// pa_sign_r[]       array of pointers to the r-components of the signatures 
-// pa_sign_s[]       array of pointers to the s-components of the signatures 
+// pa_sign_r[]       array of pointers to the r-components of the signatures
+// pa_sign_s[]       array of pointers to the s-components of the signatures
 // pa_msg[]          array of pointers to the messages are being signed
-// pa_sign_rp[]      array of pointers to the pre-computed r-components of the signatures 
+// pa_sign_rp[]      array of pointers to the pre-computed r-components of the signatures
 // pa_inv_eph_skey[] array of pointers to the inversion of signer's ephemeral private keys
 // pa_reg_skey[]     array of pointers to the regular signer's ephemeral (nonce) private keys
 // pBuffer           pointer to the scratch buffer
@@ -277,7 +277,7 @@ mbx_status mbx_nistp256_ecdsa_sign_complete_mb8(int8u* pa_sign_r[8],
    int buf_no;
 
    /* test input pointers */
-   if(NULL==pa_sign_r || NULL==pa_sign_s || NULL==pa_msg || 
+   if(NULL==pa_sign_r || NULL==pa_sign_s || NULL==pa_msg ||
       NULL==pa_sign_rp || NULL==pa_inv_eph_skey || NULL==pa_reg_skey) {
       status = MBX_SET_STS_ALL(MBX_STATUS_NULL_PARAM_ERR);
       return status;
@@ -436,7 +436,7 @@ mbx_status mbx_nistp256_ecdsa_sign_mb8(int8u* pa_sign_r[8],
    int8u stt_mask_r = MB_FUNC_NAME(is_zero_FE256_)(sign_r);
    int8u stt_mask_s = MB_FUNC_NAME(is_zero_FE256_)(sign_s);
 
-   /* convert singnature components to strings */
+   /* convert signature components to strings */
    ifma_mb8_to_HexStr8(pa_sign_r, (const int64u(*)[8])sign_r, P256_BITSIZE);
    ifma_mb8_to_HexStr8(pa_sign_s, (const int64u(*)[8])sign_s, P256_BITSIZE);
 
@@ -447,7 +447,7 @@ mbx_status mbx_nistp256_ecdsa_sign_mb8(int8u* pa_sign_r[8],
 
 /*
 // Verifies ECDSA signature
-// pa_sign_r[]       array of pointers to the computed r-components of the signatures 
+// pa_sign_r[]       array of pointers to the computed r-components of the signatures
 // pa_sign_s[]       array of pointers to the computed s-components of the signatures
 // pa_msg[]          array of pointers to the messages are being signed
 // pa_pubx[]         array of pointers to the public keys X-coordinates
@@ -460,7 +460,7 @@ mbx_status mbx_nistp256_ecdsa_verify_mb8(const int8u* const pa_sign_r[8],
                                          const int8u* const pa_msg[8],
                                          const int64u* const pa_pubx[8],
                                          const int64u* const pa_puby[8],
-                                         const int64u* const pa_pubz[8],                                       
+                                         const int64u* const pa_pubz[8],
                                                int8u* pBuffer)
 {
    mbx_status status = 0;
@@ -605,7 +605,7 @@ mbx_status mbx_nistp256_ecdsa_sign_setup_ssl_mb8(BIGNUM* pa_inv_skey[8],
       MB_FUNC_NAME(zero_)((int64u (*)[8])T, sizeof(T)/sizeof(U64));
       return status;
    }
-      
+
    nistp256_ecdsa_inv_keys_mb8(T, T, 0);
    /* store results in suitable format */
    ifma_mb8_to_BN_256(pa_inv_skey, (const int64u (*)[8])T);
@@ -643,7 +643,7 @@ mbx_status mbx_nistp256_ecdsa_sign_complete_ssl_mb8(int8u* pa_sign_r[8],
    int buf_no;
 
    /* test input pointers */
-   if(NULL==pa_sign_r || NULL==pa_sign_s || NULL==pa_msg || 
+   if(NULL==pa_sign_r || NULL==pa_sign_s || NULL==pa_msg ||
       NULL==pa_sign_rp || NULL==pa_inv_eph_skey || NULL==pa_reg_skey) {
       status = MBX_SET_STS_ALL(MBX_STATUS_NULL_PARAM_ERR);
       return status;
@@ -785,7 +785,7 @@ mbx_status mbx_nistp256_ecdsa_sign_ssl_mb8(int8u* pa_sign_r[8],
    /* clear copy of the regular secret keys */
    MB_FUNC_NAME(zero_)((int64u (*)[8])reg_key, sizeof(reg_key)/sizeof(U64));
 
-   /* convert singnature components to strings */
+   /* convert signature components to strings */
    ifma_mb8_to_HexStr8(pa_sign_r, (const int64u(*)[8])sign_r, P256_BITSIZE);
    ifma_mb8_to_HexStr8(pa_sign_s, (const int64u(*)[8])sign_s, P256_BITSIZE);
 
@@ -802,7 +802,7 @@ mbx_status mbx_nistp256_ecdsa_verify_ssl_mb8(const ECDSA_SIG* const pa_sig[8],
                                              const int8u*  const pa_msg[8],
                                              const BIGNUM* const pa_pubx[8],
                                              const BIGNUM* const pa_puby[8],
-                                             const BIGNUM* const pa_pubz[8],                                       
+                                             const BIGNUM* const pa_pubz[8],
                                                    int8u* pBuffer)
 {
    mbx_status status = 0;
@@ -840,7 +840,7 @@ mbx_status mbx_nistp256_ecdsa_verify_ssl_mb8(const ECDSA_SIG* const pa_sig[8],
    {
       if(pa_sig[buf_no] != NULL)
       {
-         ECDSA_SIG_get0(pa_sig[buf_no], (const BIGNUM(**))pa_sign_r + buf_no, 
+         ECDSA_SIG_get0(pa_sig[buf_no], (const BIGNUM(**))pa_sign_r + buf_no,
                                         (const BIGNUM(**))pa_sign_s + buf_no);
       }
    }
diff --git a/sources/ippcp/crypto_mb/src/ecnist/ifma_ecdsa_p384.c b/sources/ippcp/crypto_mb/src/ecnist/ifma_ecdsa_p384.c
index 165f80ad..e0548c8e 100644
--- a/sources/ippcp/crypto_mb/src/ecnist/ifma_ecdsa_p384.c
+++ b/sources/ippcp/crypto_mb/src/ecnist/ifma_ecdsa_p384.c
@@ -57,7 +57,7 @@ static void nistp384_ecdsa_inv_keys_mb8(U64 inv_skey[],
 // r = ([skey]*G).x mod n384
 //
 // note: pay attention on skey[] presenttaion
-//       it should be transposed and zero expanded 
+//       it should be transposed and zero expanded
 */
 static __mb_mask nistp384_ecdsa_sign_r_mb8(U64 sign_r[],
                                      const U64 skey[],
@@ -180,7 +180,7 @@ static __mb_mask nistp384_ecdsa_verify_mb8(U64 sign_r[],
 // pre-computation of ECDSA signature
 //
 // pa_inv_eph_skey[] array of pointers to the inversion of signer's ephemeral private keys
-// pa_sign_rp[]      array of pointers to the r-components of the signatures 
+// pa_sign_rp[]      array of pointers to the r-components of the signatures
 // pa_eph_skey[]     array of pointers to the ephemeral (nonce) signer's ephemeral private keys
 // pBuffer           pointer to the scratch buffer
 //
@@ -256,10 +256,10 @@ mbx_status mbx_nistp384_ecdsa_sign_setup_mb8(int64u* pa_inv_eph_skey[8],
 /*
 // computes ECDSA signature
 //
-// pa_sign_r[]       array of pointers to the r-components of the signatures 
-// pa_sign_s[]       array of pointers to the s-components of the signatures 
+// pa_sign_r[]       array of pointers to the r-components of the signatures
+// pa_sign_s[]       array of pointers to the s-components of the signatures
 // pa_msg[]          array of pointers to the messages are being signed
-// pa_sign_rp[]      array of pointers to the pre-computed r-components of the signatures 
+// pa_sign_rp[]      array of pointers to the pre-computed r-components of the signatures
 // pa_inv_eph_skey[] array of pointers to the inversion of signer's ephemeral private keys
 // pa_reg_skey[]     array of pointers to the regular signer's ephemeral (nonce) private keys
 // pBuffer           pointer to the scratch buffer
@@ -277,7 +277,7 @@ mbx_status mbx_nistp384_ecdsa_sign_complete_mb8(int8u* pa_sign_r[8],
    int buf_no;
 
    /* test input pointers */
-   if(NULL==pa_sign_r || NULL==pa_sign_s || NULL==pa_msg || 
+   if(NULL==pa_sign_r || NULL==pa_sign_s || NULL==pa_msg ||
       NULL==pa_sign_rp || NULL==pa_inv_eph_skey || NULL==pa_reg_skey) {
       status = MBX_SET_STS_ALL(MBX_STATUS_NULL_PARAM_ERR);
       return status;
@@ -417,7 +417,7 @@ mbx_status mbx_nistp384_ecdsa_sign_mb8(int8u* pa_sign_r[8],
       return status;
    }
 
-   /* compute inversion */ 
+   /* compute inversion */
    nistp384_ecdsa_inv_keys_mb8(inv_eph_key, inv_eph_key, pBuffer);
    /* compute r-component */
    nistp384_ecdsa_sign_r_mb8(sign_r, scalar, pBuffer);
@@ -435,7 +435,7 @@ mbx_status mbx_nistp384_ecdsa_sign_mb8(int8u* pa_sign_r[8],
    int8u stt_mask_r = MB_FUNC_NAME(is_zero_FE384_)(sign_r);
    int8u stt_mask_s = MB_FUNC_NAME(is_zero_FE384_)(sign_s);
 
-   /* convert singnature components to strings */
+   /* convert signature components to strings */
    ifma_mb8_to_HexStr8(pa_sign_r, (const int64u(*)[8])sign_r, P384_BITSIZE);
    ifma_mb8_to_HexStr8(pa_sign_s, (const int64u(*)[8])sign_s, P384_BITSIZE);
 
@@ -629,7 +629,7 @@ mbx_status mbx_nistp384_ecdsa_sign_setup_ssl_mb8(BIGNUM* pa_inv_skey[8],
 
    /* store results in suitable format */
    ifma_mb8_to_BN_384(pa_sign_rp, (const int64u (*)[8])T);
-   
+
    status |= MBX_SET_STS_BY_MASK(status, stt_mask, MBX_STATUS_SIGNATURE_ERR);
    return status;
 }
@@ -647,7 +647,7 @@ mbx_status mbx_nistp384_ecdsa_sign_complete_ssl_mb8(int8u* pa_sign_r[8],
    int buf_no;
 
    /* test input pointers */
-   if(NULL==pa_sign_r || NULL==pa_sign_s || NULL==pa_msg || 
+   if(NULL==pa_sign_r || NULL==pa_sign_s || NULL==pa_msg ||
       NULL==pa_sign_rp || NULL==pa_inv_eph_skey || NULL==pa_reg_skey) {
       status = MBX_SET_STS_ALL(MBX_STATUS_NULL_PARAM_ERR);
       return status;
@@ -775,7 +775,7 @@ mbx_status mbx_nistp384_ecdsa_sign_ssl_mb8(int8u* pa_sign_r[8],
       return status;
    }
 
-   /* compute inversion */ 
+   /* compute inversion */
    nistp384_ecdsa_inv_keys_mb8(inv_eph_key, inv_eph_key, pBuffer);
    /* compute r-component */
    nistp384_ecdsa_sign_r_mb8(sign_r, scalar, pBuffer);
@@ -789,7 +789,7 @@ mbx_status mbx_nistp384_ecdsa_sign_ssl_mb8(int8u* pa_sign_r[8],
    /* clear copy of the regular secret keys */
    MB_FUNC_NAME(zero_)((int64u (*)[8])reg_key, sizeof(reg_key)/sizeof(U64));
 
-   /* convert singnature components to strings */
+   /* convert signature components to strings */
    ifma_mb8_to_HexStr8(pa_sign_r, (const int64u(*)[8])sign_r, P384_BITSIZE);
    ifma_mb8_to_HexStr8(pa_sign_s, (const int64u(*)[8])sign_s, P384_BITSIZE);
 
@@ -802,7 +802,7 @@ mbx_status mbx_nistp384_ecdsa_sign_ssl_mb8(int8u* pa_sign_r[8],
 }
 
 DLL_PUBLIC
-mbx_status mbx_nistp384_ecdsa_verify_ssl_mb8(const ECDSA_SIG* const pa_sig[8], 
+mbx_status mbx_nistp384_ecdsa_verify_ssl_mb8(const ECDSA_SIG* const pa_sig[8],
                                              const int8u* const pa_msg[8],
                                              const BIGNUM* const pa_pubx[8],
                                              const BIGNUM* const pa_puby[8],
diff --git a/sources/ippcp/crypto_mb/src/ecnist/ifma_ecdsa_p521.c b/sources/ippcp/crypto_mb/src/ecnist/ifma_ecdsa_p521.c
index bc54ec89..77404111 100644
--- a/sources/ippcp/crypto_mb/src/ecnist/ifma_ecdsa_p521.c
+++ b/sources/ippcp/crypto_mb/src/ecnist/ifma_ecdsa_p521.c
@@ -57,7 +57,7 @@ static void nistp521_ecdsa_inv_keys_mb8(U64 inv_skey[],
 // r = ([skey]*G).x mod n521
 //
 // note: pay attention on skey[] presenttaion
-//       it should be transposed and zero expanded 
+//       it should be transposed and zero expanded
 */
 static __mb_mask nistp521_ecdsa_sign_r_mb8(U64 sign_r[],
                                      const U64 skey[],
@@ -135,7 +135,7 @@ static __mb_mask nistp521_ecdsa_verify_mb8(U64 sign_r[],
    /* h2 = sign_r * h */
    MB_FUNC_NAME(ifma_tomont52_n521_)(h2, sign_r);
    MB_FUNC_NAME(ifma_amm52_n521_)(h2, h2, sign_s);
-   MB_FUNC_NAME(ifma_frommont52_n521_)(h2,h2);   
+   MB_FUNC_NAME(ifma_frommont52_n521_)(h2,h2);
 
    int64u tmp[8][P521_LEN64];
    int64u* pa_tmp[8] = {tmp[0], tmp[1], tmp[2], tmp[3],
@@ -159,7 +159,7 @@ static __mb_mask nistp521_ecdsa_verify_mb8(U64 sign_r[],
 
    // P != 0
    __mb_mask signature_err_mask = MB_FUNC_NAME(is_zero_point_cordinate_)(P.Z);
-   
+
    /* sign_r_restored = P.X mod n */
    __ALIGN64 U64 sign_r_restored[P521_LEN52];
    MB_FUNC_NAME(get_nistp521_ec_affine_coords_)(sign_r_restored, NULL, &P);
@@ -180,7 +180,7 @@ static __mb_mask nistp521_ecdsa_verify_mb8(U64 sign_r[],
 // pre-computation of ECDSA signature
 //
 // pa_inv_eph_skey[] array of pointers to the inversion of signer's ephemeral private keys
-// pa_sign_rp[]      array of pointers to the r-components of the signatures 
+// pa_sign_rp[]      array of pointers to the r-components of the signatures
 // pa_eph_skey[]     array of pointers to the ephemeral (nonce) signer's ephemeral private keys
 // pBuffer           pointer to the scratch buffer
 //
@@ -256,10 +256,10 @@ mbx_status mbx_nistp521_ecdsa_sign_setup_mb8(int64u* pa_inv_eph_skey[8],
 /*
 // computes ECDSA signature
 //
-// pa_sign_r[]       array of pointers to the r-components of the signatures 
-// pa_sign_s[]       array of pointers to the s-components of the signatures 
+// pa_sign_r[]       array of pointers to the r-components of the signatures
+// pa_sign_s[]       array of pointers to the s-components of the signatures
 // pa_msg[]          array of pointers to the messages are being signed
-// pa_sign_rp[]      array of pointers to the pre-computed r-components of the signatures 
+// pa_sign_rp[]      array of pointers to the pre-computed r-components of the signatures
 // pa_inv_eph_skey[] array of pointers to the inversion of signer's ephemeral private keys
 // pa_reg_skey[]     array of pointers to the regular signer's ephemeral (nonce) private keys
 // pBuffer           pointer to the scratch buffer
@@ -277,7 +277,7 @@ mbx_status mbx_nistp521_ecdsa_sign_complete_mb8(int8u* pa_sign_r[8],
    int buf_no;
 
    /* test input pointers */
-   if(NULL==pa_sign_r || NULL==pa_sign_s || NULL==pa_msg || 
+   if(NULL==pa_sign_r || NULL==pa_sign_s || NULL==pa_msg ||
       NULL==pa_sign_rp || NULL==pa_inv_eph_skey || NULL==pa_reg_skey) {
       status = MBX_SET_STS_ALL(MBX_STATUS_NULL_PARAM_ERR);
       return status;
@@ -417,7 +417,7 @@ mbx_status mbx_nistp521_ecdsa_sign_mb8(int8u* pa_sign_r[8],
       return status;
    }
 
-   /* compute inversion */ 
+   /* compute inversion */
    nistp521_ecdsa_inv_keys_mb8(inv_eph_key, inv_eph_key, pBuffer);
    /* compute r-component */
    nistp521_ecdsa_sign_r_mb8(sign_r, scalar, pBuffer);
@@ -435,7 +435,7 @@ mbx_status mbx_nistp521_ecdsa_sign_mb8(int8u* pa_sign_r[8],
    int8u stt_mask_r = MB_FUNC_NAME(is_zero_FE521_)(sign_r);
    int8u stt_mask_s = MB_FUNC_NAME(is_zero_FE521_)(sign_s);
 
-   /* convert singnature components to strings */
+   /* convert signature components to strings */
    ifma_mb8_to_HexStr8(pa_sign_r, (const int64u(*)[8])sign_r, P521_BITSIZE);
    ifma_mb8_to_HexStr8(pa_sign_s, (const int64u(*)[8])sign_s, P521_BITSIZE);
 
@@ -446,7 +446,7 @@ mbx_status mbx_nistp521_ecdsa_sign_mb8(int8u* pa_sign_r[8],
 
 /*
 // Verifies ECDSA signature
-// pa_sign_r[]       array of pointers to the computed r-components of the signatures 
+// pa_sign_r[]       array of pointers to the computed r-components of the signatures
 // pa_sign_s[]       array of pointers to the computed s-components of the signatures
 // pa_msg[]          array of pointers to the messages are being signed
 // pa_pubx[]         array of pointers to the public keys X-coordinates
@@ -460,7 +460,7 @@ mbx_status mbx_nistp521_ecdsa_verify_mb8(const int8u* const pa_sign_r[8],
                                          const int8u* const pa_msg[8],
                                          const int64u* const pa_pubx[8],
                                          const int64u* const pa_puby[8],
-                                         const int64u* const pa_pubz[8],                                       
+                                         const int64u* const pa_pubz[8],
                                                int8u* pBuffer)
 {
    mbx_status status = 0;
@@ -625,7 +625,7 @@ mbx_status mbx_nistp521_ecdsa_sign_setup_ssl_mb8(BIGNUM* pa_inv_skey[8],
 
    /* store results in suitable format */
    ifma_mb8_to_BN_521(pa_sign_rp, (const int64u (*)[8])T);
-   
+
    status |= MBX_SET_STS_BY_MASK(status, stt_mask, MBX_STATUS_SIGNATURE_ERR);
    return status;
 }
@@ -643,7 +643,7 @@ mbx_status mbx_nistp521_ecdsa_sign_complete_ssl_mb8(int8u* pa_sign_r[8],
    int buf_no;
 
    /* test input pointers */
-   if(NULL==pa_sign_r || NULL==pa_sign_s || NULL==pa_msg || 
+   if(NULL==pa_sign_r || NULL==pa_sign_s || NULL==pa_msg ||
       NULL==pa_sign_rp || NULL==pa_inv_eph_skey || NULL==pa_reg_skey) {
       status = MBX_SET_STS_ALL(MBX_STATUS_NULL_PARAM_ERR);
       return status;
@@ -771,7 +771,7 @@ mbx_status mbx_nistp521_ecdsa_sign_ssl_mb8(int8u* pa_sign_r[8],
       return status;
    }
 
-   /* compute inversion */ 
+   /* compute inversion */
    nistp521_ecdsa_inv_keys_mb8(inv_eph_key, inv_eph_key, pBuffer);
    /* compute r-component */
    nistp521_ecdsa_sign_r_mb8(sign_r, scalar, pBuffer);
@@ -785,7 +785,7 @@ mbx_status mbx_nistp521_ecdsa_sign_ssl_mb8(int8u* pa_sign_r[8],
    /* clear copy of the regular secret keys */
    MB_FUNC_NAME(zero_)((int64u (*)[8])reg_key, sizeof(reg_key)/sizeof(U64));
 
-   /* convert singnature components to strings */
+   /* convert signature components to strings */
    ifma_mb8_to_HexStr8(pa_sign_r, (const int64u(*)[8])sign_r, P521_BITSIZE);
    ifma_mb8_to_HexStr8(pa_sign_s, (const int64u(*)[8])sign_s, P521_BITSIZE);
 
@@ -799,10 +799,10 @@ mbx_status mbx_nistp521_ecdsa_sign_ssl_mb8(int8u* pa_sign_r[8],
 
 DLL_PUBLIC
 mbx_status mbx_nistp521_ecdsa_verify_ssl_mb8(const ECDSA_SIG* const pa_sig[8],
-                                             const int8u* const pa_msg[8],                                             
+                                             const int8u* const pa_msg[8],
                                              const BIGNUM* const pa_pubx[8],
                                              const BIGNUM* const pa_puby[8],
-                                             const BIGNUM* const pa_pubz[8],                                       
+                                             const BIGNUM* const pa_pubz[8],
                                                    int8u* pBuffer)
 {
    mbx_status status = 0;
@@ -840,7 +840,7 @@ mbx_status mbx_nistp521_ecdsa_verify_ssl_mb8(const ECDSA_SIG* const pa_sig[8],
    {
       if(pa_sig[buf_no] != NULL)
       {
-         ECDSA_SIG_get0(pa_sig[buf_no], (const BIGNUM(**))pa_sign_r + buf_no, 
+         ECDSA_SIG_get0(pa_sig[buf_no], (const BIGNUM(**))pa_sign_r + buf_no,
                                         (const BIGNUM(**))pa_sign_s + buf_no);
       }
    }
@@ -858,7 +858,7 @@ mbx_status mbx_nistp521_ecdsa_verify_ssl_mb8(const ECDSA_SIG* const pa_sig[8],
    status |= MBX_SET_STS_BY_MASK(status, MB_FUNC_NAME(ifma_check_range_n521_)(sign_r), MBX_STATUS_MISMATCH_PARAM_ERR);
    status |= MBX_SET_STS_BY_MASK(status, MB_FUNC_NAME(ifma_check_range_n521_)(sign_s), MBX_STATUS_MISMATCH_PARAM_ERR);
 
-   if(!MBX_IS_ANY_OK_STS(status)) 
+   if(!MBX_IS_ANY_OK_STS(status))
       return status;
 
    P521_POINT W;
@@ -874,7 +874,7 @@ mbx_status mbx_nistp521_ecdsa_verify_ssl_mb8(const ECDSA_SIG* const pa_sig[8],
    status |= MBX_SET_STS_BY_MASK(status, MB_FUNC_NAME(ifma_check_range_p521_)(W.Y), MBX_STATUS_MISMATCH_PARAM_ERR);
    status |= MBX_SET_STS_BY_MASK(status, MB_FUNC_NAME(ifma_check_range_p521_)(W.Z), MBX_STATUS_MISMATCH_PARAM_ERR);
 
-   if(!MBX_IS_ANY_OK_STS(status)) 
+   if(!MBX_IS_ANY_OK_STS(status))
       return status;
 
   __mb_mask signature_err_mask = nistp521_ecdsa_verify_mb8(sign_r,sign_s,msg, &W);
diff --git a/sources/ippcp/crypto_mb/src/ecnist/ifma_ecpoint_p256.c b/sources/ippcp/crypto_mb/src/ecnist/ifma_ecpoint_p256.c
index ecb00a06..0ef12318 100644
--- a/sources/ippcp/crypto_mb/src/ecnist/ifma_ecpoint_p256.c
+++ b/sources/ippcp/crypto_mb/src/ecnist/ifma_ecpoint_p256.c
@@ -30,8 +30,8 @@
 
 /*
 // Presentation of point at infinity:
-//    - projective (X : Y : 0) 
-//    - affine     (0 : 0)     
+//    - projective (X : Y : 0)
+//    - affine     (0 : 0)
 */
 
 /*
@@ -330,7 +330,7 @@ static __NOINLINE void clear_secret_context(U64* wval, U64* dval, __mb_mask* dsi
    *sign = s & 1;
    *digit = (Ipp8u)d;
 */
-__INLINE void MB_FUNC_NAME(booth_recode_)(__mb_mask* sign, U64* dvalue, U64 wvalue)
+__MBX_INLINE void MB_FUNC_NAME(booth_recode_)(__mb_mask* sign, U64* dvalue, U64 wvalue)
 {
    U64 one = set1(1);
    U64 zero = get_zero64();
@@ -493,7 +493,7 @@ void MB_FUNC_NAME(ifma_ec_nistp256_mul_point_)(P256_POINT* r, const P256_POINT*
 
 #define BP_WIN_SIZE  MUL_BASEPOINT_WIN_SIZE  /* defined in the header above */
 
-__INLINE void MB_FUNC_NAME(booth_recode_bp_)(__mb_mask* sign, U64* dvalue, U64 wvalue)
+__MBX_INLINE void MB_FUNC_NAME(booth_recode_bp_)(__mb_mask* sign, U64* dvalue, U64 wvalue)
 {
    U64 one = set1(1);
    U64 zero = get_zero64();
@@ -509,7 +509,7 @@ __INLINE void MB_FUNC_NAME(booth_recode_bp_)(__mb_mask* sign, U64* dvalue, U64 w
 }
 
 /* extract affine affine point */
-__INLINE void MB_FUNC_NAME(extract_point_affine_)(P256_POINT_AFFINE* r, const SINGLE_P256_POINT_AFFINE* tbl, U64 idx)
+__MBX_INLINE void MB_FUNC_NAME(extract_point_affine_)(P256_POINT_AFFINE* r, const SINGLE_P256_POINT_AFFINE* tbl, U64 idx)
 {
    /* decrement index (the table does not contain [0]*P */
    U64 targIdx = sub64(idx, set1(1));
diff --git a/sources/ippcp/crypto_mb/src/ecnist/ifma_ecpoint_p384.c b/sources/ippcp/crypto_mb/src/ecnist/ifma_ecpoint_p384.c
index 9ef01bff..a82d7ff8 100644
--- a/sources/ippcp/crypto_mb/src/ecnist/ifma_ecpoint_p384.c
+++ b/sources/ippcp/crypto_mb/src/ecnist/ifma_ecpoint_p384.c
@@ -30,8 +30,8 @@
 
 /*
 // Presentation of point at infinity:
-//    - projective (X : Y : 0) 
-//    - affine     (0 : 0)   
+//    - projective (X : Y : 0)
+//    - affine     (0 : 0)
 */
 
 /*
@@ -329,7 +329,7 @@ static __NOINLINE void clear_secret_context(U64* wval, U64* dval, __mb_mask* dsi
    *sign = s & 1;
    *digit = (Ipp8u)d;
 */
-__INLINE void MB_FUNC_NAME(booth_recode_)(__mb_mask* sign, U64* dvalue, U64 wvalue)
+__MBX_INLINE void MB_FUNC_NAME(booth_recode_)(__mb_mask* sign, U64* dvalue, U64 wvalue)
 {
    U64 one = set1(1);
    U64 zero = get_zero64();
@@ -492,7 +492,7 @@ void MB_FUNC_NAME(ifma_ec_nistp384_mul_point_)(P384_POINT* r, const P384_POINT*
 
 #define BP_WIN_SIZE  MUL_BASEPOINT_WIN_SIZE  /* defined in the header above */
 
-__INLINE void MB_FUNC_NAME(booth_recode_bp_)(__mb_mask* sign, U64* dvalue, U64 wvalue)
+__MBX_INLINE void MB_FUNC_NAME(booth_recode_bp_)(__mb_mask* sign, U64* dvalue, U64 wvalue)
 {
    U64 one = set1(1);
    U64 zero = get_zero64();
@@ -508,7 +508,7 @@ __INLINE void MB_FUNC_NAME(booth_recode_bp_)(__mb_mask* sign, U64* dvalue, U64 w
 }
 
 /* extract affine affine point */
-__INLINE void MB_FUNC_NAME(extract_point_affine_)(P384_POINT_AFFINE* r, const SINGLE_P384_POINT_AFFINE* tbl, U64 idx)
+__MBX_INLINE void MB_FUNC_NAME(extract_point_affine_)(P384_POINT_AFFINE* r, const SINGLE_P384_POINT_AFFINE* tbl, U64 idx)
 {
    /* decrement index (the table does not contain [0]*P */
    U64 targIdx = sub64(idx, set1(1));
@@ -517,7 +517,7 @@ __INLINE void MB_FUNC_NAME(extract_point_affine_)(P384_POINT_AFFINE* r, const SI
    U64 ay0, ay1, ay2, ay3, ay4, ay5, ay6, ay7;
 
    /* assume the point at infinity is what need */
-   ax0 = ax1 = ax2 = ax3 = ax4 = ax5 = ax6 = ax7= 
+   ax0 = ax1 = ax2 = ax3 = ax4 = ax5 = ax6 = ax7=
    ay0 = ay1 = ay2 = ay3 = ay4 = ay5 = ay6 = ay7 = get_zero64();
 
    /* find out what we actually need or just keep original infinity */
diff --git a/sources/ippcp/crypto_mb/src/ecnist/ifma_ecpoint_p521.c b/sources/ippcp/crypto_mb/src/ecnist/ifma_ecpoint_p521.c
index ab22335d..e7fc8d90 100644
--- a/sources/ippcp/crypto_mb/src/ecnist/ifma_ecpoint_p521.c
+++ b/sources/ippcp/crypto_mb/src/ecnist/ifma_ecpoint_p521.c
@@ -30,8 +30,8 @@
 
 /*
 // Presentation of point at infinity:
-//    - projective (X : Y : 0) 
-//    - affine     (0 : 0)   
+//    - projective (X : Y : 0)
+//    - affine     (0 : 0)
 */
 
 /*
@@ -333,7 +333,7 @@ static __NOINLINE void clear_secret_context(U64* wval, U64* dval, __mb_mask* dsi
    *sign = s & 1;
    *digit = (Ipp8u)d;
 */
-__INLINE void MB_FUNC_NAME(booth_recode_)(__mb_mask* sign, U64* dvalue, U64 wvalue)
+__MBX_INLINE void MB_FUNC_NAME(booth_recode_)(__mb_mask* sign, U64* dvalue, U64 wvalue)
 {
    U64 one = set1(1);
    U64 zero = get_zero64();
@@ -496,7 +496,7 @@ void MB_FUNC_NAME(ifma_ec_nistp521_mul_point_)(P521_POINT* r, const P521_POINT*
 
 #define BP_WIN_SIZE  MUL_BASEPOINT_WIN_SIZE  /* defined in the header above */
 
-__INLINE void MB_FUNC_NAME(booth_recode_bp_)(__mb_mask* sign, U64* dvalue, U64 wvalue)
+__MBX_INLINE void MB_FUNC_NAME(booth_recode_bp_)(__mb_mask* sign, U64* dvalue, U64 wvalue)
 {
    U64 one = set1(1);
    U64 zero = get_zero64();
@@ -512,7 +512,7 @@ __INLINE void MB_FUNC_NAME(booth_recode_bp_)(__mb_mask* sign, U64* dvalue, U64 w
 }
 
 /* extract affine affine point */
-__INLINE void MB_FUNC_NAME(extract_point_affine_)(P521_POINT_AFFINE* r, const SINGLE_P521_POINT_AFFINE* tbl, U64 idx)
+__MBX_INLINE void MB_FUNC_NAME(extract_point_affine_)(P521_POINT_AFFINE* r, const SINGLE_P521_POINT_AFFINE* tbl, U64 idx)
 {
    /* decrement index (the table does not contain [0]*P */
    U64 targIdx = sub64(idx, set1(1));
diff --git a/sources/ippcp/crypto_mb/src/ed25519/ifma_arith_ed25519.c b/sources/ippcp/crypto_mb/src/ed25519/ifma_arith_ed25519.c
index 46a0bc72..c630d4cb 100644
--- a/sources/ippcp/crypto_mb/src/ed25519/ifma_arith_ed25519.c
+++ b/sources/ippcp/crypto_mb/src/ed25519/ifma_arith_ed25519.c
@@ -55,7 +55,7 @@ __ALIGN64 static const int64u ed25519_2_pm1_4[FE_LEN52][sizeof(U64) / sizeof(int
 };
 
 /* ext => cached */
-__INLINE void ge_ext_to_cached_mb(ge52_cached_mb *r, const ge52_ext_mb* p)
+__MBX_INLINE void ge_ext_to_cached_mb(ge52_cached_mb *r, const ge52_ext_mb* p)
 {
    fe52_add(r->YaddX, p->Y, p->X);
    fe52_sub(r->YsubX, p->Y, p->X);
@@ -272,15 +272,15 @@ static void extract_precomputed_basepoint_dual(ge52_precomp_mb* p0,
 */
 
 /* if msb set */
-__INLINE int32u isMsb_ct(int32u a)
+__MBX_INLINE int32u isMsb_ct(int32u a)
 { return (int32u)0 - (a >> (sizeof(a) * 8 - 1)); }
 
 /* tests if a==0 */
-__INLINE int32u isZero(int32u a)
+__MBX_INLINE int32u isZero(int32u a)
 { return isMsb_ct(~a & (a - 1)); }
 
 /* tests if a==b */
-__INLINE int32u isEqu(int32u a, int32u b)
+__MBX_INLINE int32u isEqu(int32u a, int32u b)
 { return isZero(a ^ b); }
 
 void ifma_ed25519_mul_basepoint(ge52_ext_mb* r, const U64 scalar[])
@@ -297,7 +297,7 @@ void ifma_ed25519_mul_basepoint(ge52_ext_mb* r, const U64 scalar[])
    __ALIGN64 ge52_p1p1_mb t;
    __ALIGN64 ge52_homo_mb s;
 
-   /* initial values are nuetral */
+   /* initial values are neutral */
    neutral_ge52_ext_mb(&r0);
    neutral_ge52_ext_mb(&r1);
 
@@ -377,7 +377,7 @@ void ifma_ed25519_mul_basepoint(ge52_ext_mb* r, const U64 scalar[])
    *sign = s & 1;
    *digit = (Ipp8u)d;
 */
-__INLINE void booth_recode(__mb_mask* sign, U64* dvalue, U64 wvalue)
+__MBX_INLINE void booth_recode(__mb_mask* sign, U64* dvalue, U64 wvalue)
 {
    U64 one = set1(1);
    U64 zero = get_zero64();
diff --git a/sources/ippcp/crypto_mb/src/ed25519/ifma_arith_n25519.c b/sources/ippcp/crypto_mb/src/ed25519/ifma_arith_n25519.c
index 0c5b8fc4..f97183e3 100644
--- a/sources/ippcp/crypto_mb/src/ed25519/ifma_arith_n25519.c
+++ b/sources/ippcp/crypto_mb/src/ed25519/ifma_arith_n25519.c
@@ -105,7 +105,7 @@ void ifma52_sub_with_borrow(U64 r[], const U64 x[], const U64 y[])
    #endif
 }
 
-// r = x<ed25519n? x : x-ed25519n 
+// r = x<ed25519n? x : x-ed25519n
 void ifma52_sub_ed25519n(U64 r[], const U64 x[])
 {
    U64* n = (U64*)ed25519n_mb;
@@ -236,7 +236,7 @@ void ifma52_ed25519n_reduce(U64 r[NE_LEN52], const U64 x[NE_LEN52*2])
    ROUND_MUL(q3_9, q3_10, q1[4], mu[5]);
    ROUND_MUL(q3_9, q3_10, q1[5], mu[4]);
 
-   /* note, that the lates (2*k+2) digit will always be 0 (count bits presentation of x and mu) */
+   /* note, that the latest (2*k+2) digit will always be 0 (count bits presentation of x and mu) */
    ROUND_MUL(q3_10, q3_11, q1[5], mu[5]);
 
    /* normalization,  q3 = {q3_11, q3_10, q3_9, q3_8, q3_7, q3_6}, note q3_11 is zero */
diff --git a/sources/ippcp/crypto_mb/src/ed25519/ifma_arith_p25519.c b/sources/ippcp/crypto_mb/src/ed25519/ifma_arith_p25519.c
index 4c4a80dd..b77a00b9 100644
--- a/sources/ippcp/crypto_mb/src/ed25519/ifma_arith_p25519.c
+++ b/sources/ippcp/crypto_mb/src/ed25519/ifma_arith_p25519.c
@@ -36,7 +36,7 @@ __ALIGN64 static const int64u VPRIME25519_HI[sizeof(U64) / sizeof(int64u)] = {
     R##I = and64_const(R##I, DIGIT_MASK);
 
 
-//__INLINE
+//__MBX_INLINE
 void fe52_mb_add_mod25519(fe52_mb vr, const fe52_mb va, const fe52_mb vb)
 {
    /* r = a+b */
@@ -75,7 +75,7 @@ void fe52_mb_add_mod25519(fe52_mb vr, const fe52_mb va, const fe52_mb vb)
    vr[4] = mask_mov64(t4, cmask, r4);
 }
 
-//__INLINE
+//__MBX_INLINE
 void fe52_mb_sub_mod25519(fe52_mb vr, const fe52_mb va, const fe52_mb vb)
 {
    /* r = a-b */
@@ -114,7 +114,7 @@ void fe52_mb_sub_mod25519(fe52_mb vr, const fe52_mb va, const fe52_mb vb)
    vr[4] = mask_mov64(r4, cmask, t4);
 }
 
-//__INLINE
+//__MBX_INLINE
 void fe52_mb_neg_mod25519(fe52_mb vr, const fe52_mb va)
 {
    __mb_mask non_zero = ~fe52_mb_is_zero(va);
@@ -168,7 +168,7 @@ __ALIGN64 static const int64u MOD_2_260_[sizeof(U64) / sizeof(int64u)] = { REP8_
         srli64(r##R5, 52), MOD_2_260);
 
 
-//__INLINE
+//__MBX_INLINE
 void fe52_mb_mul_mod25519(fe52_mb vr, const fe52_mb va, const fe52_mb vb)
 {
    U64 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9;
@@ -238,7 +238,7 @@ c=6  (2,4)  (3,3)
 c=7  (3,4)
 c=8  (4,4)
 */
-//__INLINE
+//__MBX_INLINE
 void fe52_mb_sqr_mod25519(fe52_mb vr, const fe52_mb va)
 {
    U64 *vb = (U64*)va;
@@ -386,7 +386,7 @@ void fe52_mb_sqr_mod25519_times(fe52_mb vr, const fe52_mb va, int count)
    considering the exponent as
    2^255 - 21 = (2^5) * (2^250 - 1) + 11.
 */
-//__INLINE
+//__MBX_INLINE
 void fe52_mb_inv_mod25519(fe52_mb r, const fe52_mb z)
 {
    __ALIGN64 fe52_mb t0;
diff --git a/sources/ippcp/crypto_mb/src/fips_cert/selftest_mbx_nistp256_ecdsa_verify_mb8.c b/sources/ippcp/crypto_mb/src/fips_cert/selftest_mbx_nistp256_ecdsa_verify_mb8.c
index bbdb2504..33bf5b61 100644
--- a/sources/ippcp/crypto_mb/src/fips_cert/selftest_mbx_nistp256_ecdsa_verify_mb8.c
+++ b/sources/ippcp/crypto_mb/src/fips_cert/selftest_mbx_nistp256_ecdsa_verify_mb8.c
@@ -116,7 +116,7 @@ fips_test_status fips_selftest_mbx_nistp256_ecdsa_verify_ssl_mb8(void) {
     test_result = MBX_ALGO_SELFTEST_BAD_ARGS_ERR;
     MEM_FREE(BN_Qx, BN_Qy, sig)
     // Handled separately, since memory management of
-    // these variables will below be transfered to sig
+    // these variables is transferred to sig below
     BN_free(BN_r);
     BN_free(BN_s);
     return test_result;
diff --git a/sources/ippcp/crypto_mb/src/fips_cert/selftest_mbx_nistp256_ecpublic_key_mb8.c b/sources/ippcp/crypto_mb/src/fips_cert/selftest_mbx_nistp256_ecpublic_key_mb8.c
index 602be659..12fe9afc 100644
--- a/sources/ippcp/crypto_mb/src/fips_cert/selftest_mbx_nistp256_ecpublic_key_mb8.c
+++ b/sources/ippcp/crypto_mb/src/fips_cert/selftest_mbx_nistp256_ecpublic_key_mb8.c
@@ -220,7 +220,7 @@ fips_test_status fips_selftest_mbx_nistp256_ecpublic_key_ssl_mb8(void) {
       test_result = MBX_ALGO_SELFTEST_BAD_ARGS_ERR;
       MEM_FREE(pa_pub_Qx, pa_pub_Qy, pa_sig, BN_d, BN_k)
       // Handled separately, since memory management of
-      // these variables is transfered to sig below
+      // these variables is transferred to sig below
       BN_free(BN_r);
       BN_free(BN_s);
       return test_result;
diff --git a/sources/ippcp/crypto_mb/src/fips_cert/selftest_mbx_nistp384_ecdsa_verify_mb8.c b/sources/ippcp/crypto_mb/src/fips_cert/selftest_mbx_nistp384_ecdsa_verify_mb8.c
index 5bb3bed9..ac05a64c 100644
--- a/sources/ippcp/crypto_mb/src/fips_cert/selftest_mbx_nistp384_ecdsa_verify_mb8.c
+++ b/sources/ippcp/crypto_mb/src/fips_cert/selftest_mbx_nistp384_ecdsa_verify_mb8.c
@@ -127,7 +127,7 @@ fips_test_status fips_selftest_mbx_nistp384_ecdsa_verify_ssl_mb8(void) {
     test_result = MBX_ALGO_SELFTEST_BAD_ARGS_ERR;
     MEM_FREE(BN_Qx, BN_Qy, sig)
     // Handled separately, since memory management of
-    // these variables will below be transfered to sig
+    // these variables is transferred to sig below
     BN_free(BN_r);
     BN_free(BN_s);
     return test_result;
diff --git a/sources/ippcp/crypto_mb/src/fips_cert/selftest_mbx_nistp384_ecpublic_key_mb8.c b/sources/ippcp/crypto_mb/src/fips_cert/selftest_mbx_nistp384_ecpublic_key_mb8.c
index 1ad0b5c7..186fdf64 100644
--- a/sources/ippcp/crypto_mb/src/fips_cert/selftest_mbx_nistp384_ecpublic_key_mb8.c
+++ b/sources/ippcp/crypto_mb/src/fips_cert/selftest_mbx_nistp384_ecpublic_key_mb8.c
@@ -221,7 +221,7 @@ fips_test_status fips_selftest_mbx_nistp384_ecpublic_key_ssl_mb8(void) {
       test_result = MBX_ALGO_SELFTEST_BAD_ARGS_ERR;
       MEM_FREE(pa_pub_Qx, pa_pub_Qy, pa_sig, BN_d, BN_k)
       // Handled separately, since memory management of
-      // these variables is transfered to sig below
+      // these variables is transferred to sig below
       BN_free(BN_r);
       BN_free(BN_s);
       return test_result;
diff --git a/sources/ippcp/crypto_mb/src/fips_cert/selftest_mbx_nistp521_ecdsa_verify_mb8.c b/sources/ippcp/crypto_mb/src/fips_cert/selftest_mbx_nistp521_ecdsa_verify_mb8.c
index 7164781a..169e1dfb 100644
--- a/sources/ippcp/crypto_mb/src/fips_cert/selftest_mbx_nistp521_ecdsa_verify_mb8.c
+++ b/sources/ippcp/crypto_mb/src/fips_cert/selftest_mbx_nistp521_ecdsa_verify_mb8.c
@@ -146,7 +146,7 @@ fips_test_status fips_selftest_mbx_nistp521_ecdsa_verify_ssl_mb8(void) {
     test_result = MBX_ALGO_SELFTEST_BAD_ARGS_ERR;
     MEM_FREE(BN_Qx, BN_Qy, sig)
     // Handled separately, since memory management of
-    // these variables will below be transfered to sig
+    // these variables is transferred to sig below
     BN_free(BN_r);
     BN_free(BN_s);
     return test_result;
diff --git a/sources/ippcp/crypto_mb/src/fips_cert/selftest_mbx_nistp521_ecpublic_key_mb8.c b/sources/ippcp/crypto_mb/src/fips_cert/selftest_mbx_nistp521_ecpublic_key_mb8.c
index b9c0dd60..d50fc8aa 100644
--- a/sources/ippcp/crypto_mb/src/fips_cert/selftest_mbx_nistp521_ecpublic_key_mb8.c
+++ b/sources/ippcp/crypto_mb/src/fips_cert/selftest_mbx_nistp521_ecpublic_key_mb8.c
@@ -230,7 +230,7 @@ fips_test_status fips_selftest_mbx_nistp521_ecpublic_key_ssl_mb8(void) {
       test_result = MBX_ALGO_SELFTEST_BAD_ARGS_ERR;
       MEM_FREE(pa_pub_Qx, pa_pub_Qy, pa_sig, BN_d, BN_k)
       // Handled separately, since memory management of
-      // these variables is transfered to sig below
+      // these variables is transferred to sig below
       BN_free(BN_r);
       BN_free(BN_s);
       return test_result;
diff --git a/sources/ippcp/crypto_mb/src/sm2/ifma_ecpoint_sm2.c b/sources/ippcp/crypto_mb/src/sm2/ifma_ecpoint_sm2.c
index 6e7f654c..dbe449cd 100644
--- a/sources/ippcp/crypto_mb/src/sm2/ifma_ecpoint_sm2.c
+++ b/sources/ippcp/crypto_mb/src/sm2/ifma_ecpoint_sm2.c
@@ -29,8 +29,8 @@
 
 /*
 // Presentation of point at infinity:
-//    - projective (X : Y : 0) 
-//    - affine     (0 : 0)     
+//    - projective (X : Y : 0)
+//    - affine     (0 : 0)
 */
 
 /*
@@ -330,7 +330,7 @@ static __NOINLINE void clear_secret_context(U64* wval, U64* dval, __mb_mask* dsi
    *sign = s & 1;
    *digit = (Ipp8u)d;
 */
-__INLINE void MB_FUNC_NAME(booth_recode_)(__mb_mask* sign, U64* dvalue, U64 wvalue)
+__MBX_INLINE void MB_FUNC_NAME(booth_recode_)(__mb_mask* sign, U64* dvalue, U64 wvalue)
 {
    U64 one = set1(1);
    U64 zero = get_zero64();
@@ -493,7 +493,7 @@ void MB_FUNC_NAME(ifma_ec_sm2_mul_point_)(SM2_POINT* r, const SM2_POINT* p, cons
 
 #define BP_WIN_SIZE  MUL_BASEPOINT_WIN_SIZE  /* defined in the header above */
 
-__INLINE void MB_FUNC_NAME(booth_recode_bp_)(__mb_mask* sign, U64* dvalue, U64 wvalue)
+__MBX_INLINE void MB_FUNC_NAME(booth_recode_bp_)(__mb_mask* sign, U64* dvalue, U64 wvalue)
 {
    U64 one = set1(1);
    U64 zero = get_zero64();
@@ -509,7 +509,7 @@ __INLINE void MB_FUNC_NAME(booth_recode_bp_)(__mb_mask* sign, U64* dvalue, U64 w
 }
 
 /* extract affine affine point */
-__INLINE void MB_FUNC_NAME(extract_point_affine_)(SM2_POINT_AFFINE* r, const SINGLE_SM2_POINT_AFFINE* tbl, U64 idx)
+__MBX_INLINE void MB_FUNC_NAME(extract_point_affine_)(SM2_POINT_AFFINE* r, const SINGLE_SM2_POINT_AFFINE* tbl, U64 idx)
 {
    /* decrement index (the table does not contain [0]*P */
    U64 targIdx = sub64(idx, set1(1));
diff --git a/sources/ippcp/crypto_mb/src/sm3/sm3_avx512_mb16.c b/sources/ippcp/crypto_mb/src/sm3/sm3_avx512_mb16.c
index cc8df63b..4a82c453 100644
--- a/sources/ippcp/crypto_mb/src/sm3/sm3_avx512_mb16.c
+++ b/sources/ippcp/crypto_mb/src/sm3/sm3_avx512_mb16.c
@@ -16,7 +16,7 @@
 
 #include <internal/sm3/sm3_mb16.h>
 
-__INLINE void TRANSPOSE_16X16_I32(int32u out[][16], const int32u* const inp[16])
+__MBX_INLINE void TRANSPOSE_16X16_I32(int32u out[][16], const int32u* const inp[16])
 {
     __m512i r0 = _mm512_loadu_si512(inp[0]);
     __m512i r1 = _mm512_loadu_si512(inp[1]);
@@ -36,11 +36,11 @@ __INLINE void TRANSPOSE_16X16_I32(int32u out[][16], const int32u* const inp[16])
     __m512i r15 = _mm512_loadu_si512(inp[15]);
 
     // tansposition
-    __m512i t0 = _mm512_unpacklo_epi32(r0, r1);  //   0  16   1  17   4  20   5  21   8  24   9  25  12  28  13  29 
+    __m512i t0 = _mm512_unpacklo_epi32(r0, r1);  //   0  16   1  17   4  20   5  21   8  24   9  25  12  28  13  29
     __m512i t1 = _mm512_unpackhi_epi32(r0, r1);  //   2  18   3  19   6  22   7  23  10  26  11  27  14  30  15  31
     __m512i t2 = _mm512_unpacklo_epi32(r2, r3);  //  32  48  33  49 ...
     __m512i t3 = _mm512_unpackhi_epi32(r2, r3);  //  34  50  35  51 ...
-    __m512i t4 = _mm512_unpacklo_epi32(r4, r5);  //  64  80  65  81 ...  
+    __m512i t4 = _mm512_unpacklo_epi32(r4, r5);  //  64  80  65  81 ...
     __m512i t5 = _mm512_unpackhi_epi32(r4, r5);  //  66  82  67  83 ...
     __m512i t6 = _mm512_unpacklo_epi32(r6, r7);  //  96 112  97 113 ...
     __m512i t7 = _mm512_unpackhi_epi32(r6, r7);  //  98 114  99 115 ...
@@ -57,15 +57,15 @@ __INLINE void TRANSPOSE_16X16_I32(int32u out[][16], const int32u* const inp[16])
     r1 = _mm512_unpackhi_epi64(t0, t2); //   1  17  33  49 ...
     r2 = _mm512_unpacklo_epi64(t1, t3); //   2  18  34  49 ...
     r3 = _mm512_unpackhi_epi64(t1, t3); //   3  19  35  51 ...
-    r4 = _mm512_unpacklo_epi64(t4, t6); //  64  80  96 112 ...  
+    r4 = _mm512_unpacklo_epi64(t4, t6); //  64  80  96 112 ...
     r5 = _mm512_unpackhi_epi64(t4, t6); //  65  81  97 114 ...
     r6 = _mm512_unpacklo_epi64(t5, t7); //  66  82  98 113 ...
     r7 = _mm512_unpackhi_epi64(t5, t7); //  67  83  99 115 ...
-    r8 = _mm512_unpacklo_epi64(t8, t10); // 128 144 160 176 ...  
+    r8 = _mm512_unpacklo_epi64(t8, t10); // 128 144 160 176 ...
     r9 = _mm512_unpackhi_epi64(t8, t10); // 129 145 161 178 ...
-    r10 = _mm512_unpacklo_epi64(t9, t11); // 130 146 162 177 ... 
+    r10 = _mm512_unpacklo_epi64(t9, t11); // 130 146 162 177 ...
     r11 = _mm512_unpackhi_epi64(t9, t11); // 131 147 163 179 ...
-    r12 = _mm512_unpacklo_epi64(t12, t14); // 192 208 228 240 ... 
+    r12 = _mm512_unpacklo_epi64(t12, t14); // 192 208 228 240 ...
     r13 = _mm512_unpackhi_epi64(t12, t14); // 193 209 229 241 ...
     r14 = _mm512_unpacklo_epi64(t13, t15); // 194 210 230 242 ...
     r15 = _mm512_unpackhi_epi64(t13, t15); // 195 211 231 243 ...
@@ -318,7 +318,7 @@ void sm3_avx512_mb16(int32u hash_pa[][16], const int8u* const msg_pa[16], int le
         _mm512_storeu_si512(hash_pa + 5, F);
         _mm512_storeu_si512(hash_pa + 6, G);
         _mm512_storeu_si512(hash_pa + 7, H);
- 
+
         /* Update pointers to data, local  lengths and mask */
         _mm512_storeu_si512(loc_data, _mm512_mask_add_epi64(_mm512_set1_epi64((long long)&zero_buffer), (__mmask8)mb_mask, _mm512_loadu_si512(loc_data), _mm512_set1_epi64(SM3_MSG_BLOCK_SIZE)));
         _mm512_storeu_si512(loc_data + 8, _mm512_mask_add_epi64(_mm512_set1_epi64((long long)&zero_buffer), *((__mmask8*)&mb_mask + 1), _mm512_loadu_si512(loc_data+8), _mm512_set1_epi64(SM3_MSG_BLOCK_SIZE)));
diff --git a/sources/ippcp/crypto_mb/src/sm4/gcm/internal/sm4_gcm_gctr_kernel_mb16.c b/sources/ippcp/crypto_mb/src/sm4/gcm/internal/sm4_gcm_gctr_kernel_mb16.c
index f786b9a4..84761678 100644
--- a/sources/ippcp/crypto_mb/src/sm4/gcm/internal/sm4_gcm_gctr_kernel_mb16.c
+++ b/sources/ippcp/crypto_mb/src/sm4/gcm/internal/sm4_gcm_gctr_kernel_mb16.c
@@ -23,7 +23,7 @@
 // Implementation is the same with SM4-CTR
 */
 
-__INLINE __m128i IncBlock128(__m128i x, int32u increment) { return _mm_add_epi32(x, _mm_maskz_loadu_epi32(1, &increment)); }
+__MBX_INLINE __m128i IncBlock128(__m128i x, int32u increment) { return _mm_add_epi32(x, _mm_maskz_loadu_epi32(1, &increment)); }
 
 static void sm4_gctr_mask_kernel_mb16(__m512i *CTR,
                                       const __m512i *p_rk,
diff --git a/sources/ippcp/crypto_mb/src/sm4/gcm/internal/sm4_gcm_update_ghash_full_blocks_mb16.c b/sources/ippcp/crypto_mb/src/sm4/gcm/internal/sm4_gcm_update_ghash_full_blocks_mb16.c
index a5f3cfc2..e35f6cc3 100644
--- a/sources/ippcp/crypto_mb/src/sm4/gcm/internal/sm4_gcm_update_ghash_full_blocks_mb16.c
+++ b/sources/ippcp/crypto_mb/src/sm4/gcm/internal/sm4_gcm_update_ghash_full_blocks_mb16.c
@@ -17,7 +17,7 @@
 #include <internal/common/ifma_defs.h>
 #include <internal/sm4/sm4_gcm_mb.h>
 
-__INLINE void read_first(__m512i *data_blocks[4], const int8u *const pa_input[SM4_LINES], __mmask16 load_mask)
+__MBX_INLINE void read_first(__m512i *data_blocks[4], const int8u *const pa_input[SM4_LINES], __mmask16 load_mask)
 {
    __mmask16 load_mask_0 = load_mask >> 0 * 4;
    __mmask16 load_mask_1 = load_mask >> 1 * 4;
@@ -114,7 +114,7 @@ __INLINE void read_first(__m512i *data_blocks[4], const int8u *const pa_input[SM
    /* End of explicitly unrolled loop */
 }
 
-__INLINE void read_next(__m512i *data_blocks[4], const int8u *const pa_input[SM4_LINES], int block_number, __mmask16 load_mask)
+__MBX_INLINE void read_next(__m512i *data_blocks[4], const int8u *const pa_input[SM4_LINES], int block_number, __mmask16 load_mask)
 {
    __mmask16 load_mask_0 = load_mask >> 0 * 4;
    __mmask16 load_mask_1 = load_mask >> 1 * 4;
diff --git a/sources/ippcp/crypto_mb/src/sm4/gcm/internal/sm4_gcm_update_ghash_partial_blocks_mb16.c b/sources/ippcp/crypto_mb/src/sm4/gcm/internal/sm4_gcm_update_ghash_partial_blocks_mb16.c
index 268acd32..c225be1c 100644
--- a/sources/ippcp/crypto_mb/src/sm4/gcm/internal/sm4_gcm_update_ghash_partial_blocks_mb16.c
+++ b/sources/ippcp/crypto_mb/src/sm4/gcm/internal/sm4_gcm_update_ghash_partial_blocks_mb16.c
@@ -17,7 +17,7 @@
 #include <internal/common/ifma_defs.h>
 #include <internal/sm4/sm4_gcm_mb.h>
 
-__INLINE void read_first(__m512i *data_blocks[4], const int8u *const pa_input[SM4_LINES], __m512i *input_len, __mmask16 load_mask)
+__MBX_INLINE void read_first(__m512i *data_blocks[4], const int8u *const pa_input[SM4_LINES], __m512i *input_len, __mmask16 load_mask)
 {
    __mmask16 load_mask_0 = load_mask >> 0 * 4;
    __mmask16 load_mask_1 = load_mask >> 1 * 4;
diff --git a/sources/ippcp/crypto_mb/src/sm4/sm4_ctr_mb16.c b/sources/ippcp/crypto_mb/src/sm4/sm4_ctr_mb16.c
index 80ceeaa7..974fcd7f 100644
--- a/sources/ippcp/crypto_mb/src/sm4/sm4_ctr_mb16.c
+++ b/sources/ippcp/crypto_mb/src/sm4/sm4_ctr_mb16.c
@@ -142,7 +142,7 @@ static void sm4_ctr128_mask_kernel_mb16(__m512i* CTR, const __m512i* p_rk, __m51
 }
 
 
-__INLINE __m128i IncBlock128(__m128i x, int32u increment)
+__MBX_INLINE __m128i IncBlock128(__m128i x, int32u increment)
 {
    __m128i t = _mm_add_epi64(x, _mm_maskz_loadu_epi32(1, &increment));
    __mmask8 carryMask = _mm_cmplt_epu64_mask(t, x);
diff --git a/sources/ippcp/crypto_mb/src/x25519/ifma_x25519.c b/sources/ippcp/crypto_mb/src/x25519/ifma_x25519.c
index d11d9dc5..7364e7c4 100644
--- a/sources/ippcp/crypto_mb/src/x25519/ifma_x25519.c
+++ b/sources/ippcp/crypto_mb/src/x25519/ifma_x25519.c
@@ -54,7 +54,7 @@ __ALIGN64 static const int64u MOD_2_260_[8] = {19*32, 19*32, 19*32, 19*32,
 
 ////////////////////////////////////////////////////////////
 
-__INLINE void ed25519_mul(U64 out[], const U64 a[], const U64 b[]) {
+__MBX_INLINE void ed25519_mul(U64 out[], const U64 a[], const U64 b[]) {
     U64 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9;
 
     U64 *va = (U64*) a;
@@ -83,7 +83,7 @@ __INLINE void ed25519_mul(U64 out[], const U64 a[], const U64 b[]) {
     ROUND_MUL(2, 4, r6, r7)
     ROUND_MUL(3, 3, r6, r7)
     ROUND_MUL(4, 2, r6, r7)
-    
+
     ROUND_MUL(0, 0, r0, r1)
     ROUND_MUL(0, 1, r1, r2)
     ROUND_MUL(0, 2, r2, r3)
@@ -116,18 +116,18 @@ __INLINE void ed25519_mul(U64 out[], const U64 a[], const U64 b[]) {
 }
 
 /* SQR
-c=0  (0,0)  
-c=1  (0,1)  
-c=2  (0,2)  (1,1)  
-c=3  (0,3)  (1,2)  
-c=4  (0,4)  (1,3)  (2,2)  
-c=5  (1,4)  (2,3)  
-c=6  (2,4)  (3,3)  
-c=7  (3,4)  
+c=0  (0,0)
+c=1  (0,1)
+c=2  (0,2)  (1,1)
+c=3  (0,3)  (1,2)
+c=4  (0,4)  (1,3)  (2,2)
+c=5  (1,4)  (2,3)
+c=6  (2,4)  (3,3)
+c=7  (3,4)
 c=8  (4,4)
 */
 
-__INLINE void ed25519_sqr(U64 out[], const U64 a[]) {
+__MBX_INLINE void ed25519_sqr(U64 out[], const U64 a[]) {
     U64 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9;
 
     U64 *va = (U64*) a;
@@ -299,7 +299,7 @@ static const int64u VMASK52[8] = {MASK52, MASK52, MASK52, MASK52,
     R##0 = fma52lo(R##0, srli64(R##4, 47), MOD_2_255);            \
     R##4 = and64(R##4, loadu64(VMASK_R4));
 
-__INLINE void ed25519_mul_dual(U64 out0[], U64 out1[],
+__MBX_INLINE void ed25519_mul_dual(U64 out0[], U64 out1[],
                 const U64 a0[], const U64 b0[],
                 const U64 a1[], const U64 b1[]) {
 
@@ -406,7 +406,7 @@ __INLINE void ed25519_mul_dual(U64 out0[], U64 out1[],
     storeu64(&vr1[4], r14);
 }
 
-__INLINE void ed25519_sqr_dual(U64 out0[], U64 out1[],
+__MBX_INLINE void ed25519_sqr_dual(U64 out0[], U64 out1[],
                 const U64 a0[], const U64 a1[]) {
 
     U64 r00, r01, r02, r03, r04, r05, r06, r07, r08, r09;
@@ -514,7 +514,7 @@ __INLINE void ed25519_sqr_dual(U64 out0[], U64 out1[],
 //////////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////////
 
-__INLINE void fe52mb8_set(U64 out[], int64u value)
+__MBX_INLINE void fe52mb8_set(U64 out[], int64u value)
 {
     storeu64(&out[0], set64((long long)value));
     storeu64(&out[1], get_zero64());
@@ -522,7 +522,7 @@ __INLINE void fe52mb8_set(U64 out[], int64u value)
     storeu64(&out[3], get_zero64());
     storeu64(&out[4], get_zero64());
 }
-__INLINE void fe52mb8_copy(U64 out[], const U64 in[])
+__MBX_INLINE void fe52mb8_copy(U64 out[], const U64 in[])
 {
     storeu64(&out[0], loadu64(&in[0]));
     storeu64(&out[1], loadu64(&in[1]));
@@ -533,7 +533,7 @@ __INLINE void fe52mb8_copy(U64 out[], const U64 in[])
 
 // Clang warning -Wunused-function
 #if(0)
-__INLINE void fe52mb8_mul_mod25519(U64 vr[], const U64 va[], const U64 vb[])
+__MBX_INLINE void fe52mb8_mul_mod25519(U64 vr[], const U64 va[], const U64 vb[])
 {
     U64 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9;
     r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = r8 = r9 = get_zero64();
@@ -558,7 +558,7 @@ __INLINE void fe52mb8_mul_mod25519(U64 vr[], const U64 va[], const U64 vb[])
     ROUND_MUL(2, 4, r6, r7)
     ROUND_MUL(3, 3, r6, r7)
     ROUND_MUL(4, 2, r6, r7)
-    
+
     ROUND_MUL(0, 0, r0, r1)
     ROUND_MUL(0, 1, r1, r2)
     ROUND_MUL(0, 2, r2, r3)
@@ -594,13 +594,13 @@ __INLINE void fe52mb8_mul_mod25519(U64 vr[], const U64 va[], const U64 vb[])
     storeu64(&vr[4], r4);
 }
 
-__INLINE void fe52mb8_sqr_mod25519(U64 out[], const U64 a[])
+__MBX_INLINE void fe52mb8_sqr_mod25519(U64 out[], const U64 a[])
 {
    fe52mb8_mul_mod25519(out, a, a);
 }
 #endif
 
-__INLINE void fe52mb8_mul121666_mod25519(U64 vr[], const U64 va[])
+__MBX_INLINE void fe52mb8_mul121666_mod25519(U64 vr[], const U64 va[])
 {
     U64 multiplier = set64(121666);
 
@@ -647,20 +647,20 @@ __INLINE void fe52mb8_mul121666_mod25519(U64 vr[], const U64 va[])
 // __ALIGN64 static const int64u prime25519[5] = {
 //   PRIME25519_LO, PRIME25519_MID, PRIME25519_MID, PRIME25519_MID, PRIME25519_HI};
 
-__ALIGN64 static const int64u VPRIME25519_LO[8] = 
-    { PRIME25519_LO, PRIME25519_LO, PRIME25519_LO, PRIME25519_LO, 
+__ALIGN64 static const int64u VPRIME25519_LO[8] =
+    { PRIME25519_LO, PRIME25519_LO, PRIME25519_LO, PRIME25519_LO,
       PRIME25519_LO, PRIME25519_LO, PRIME25519_LO, PRIME25519_LO };
 
-__ALIGN64 static const int64u VPRIME25519_MID[8] = 
-    { PRIME25519_MID, PRIME25519_MID, PRIME25519_MID, PRIME25519_MID, 
+__ALIGN64 static const int64u VPRIME25519_MID[8] =
+    { PRIME25519_MID, PRIME25519_MID, PRIME25519_MID, PRIME25519_MID,
       PRIME25519_MID, PRIME25519_MID, PRIME25519_MID, PRIME25519_MID };
 
-__ALIGN64 static const int64u VPRIME25519_HI[8] = 
-    { PRIME25519_HI, PRIME25519_HI, PRIME25519_HI, PRIME25519_HI, 
+__ALIGN64 static const int64u VPRIME25519_HI[8] =
+    { PRIME25519_HI, PRIME25519_HI, PRIME25519_HI, PRIME25519_HI,
       PRIME25519_HI, PRIME25519_HI, PRIME25519_HI, PRIME25519_HI };
 
 
-__INLINE U64 cmov_U64(U64 a, U64 b, __mb_mask kmask)
+__MBX_INLINE U64 cmov_U64(U64 a, U64 b, __mb_mask kmask)
 {  return mask_mov64 (a, kmask, b); }
 
 #define NORM_ASHIFTR(R, I, J) \
@@ -671,7 +671,7 @@ __INLINE U64 cmov_U64(U64 a, U64 b, __mb_mask kmask)
     R##J = add64(R##J, srli64(R##I, DIGIT_SIZE)); \
     R##I = and64(R##I, loadu64(VMASK52));
 
-__INLINE void fe52mb8_add_mod25519(U64 vr[], const U64 va[], const U64 vb[])
+__MBX_INLINE void fe52mb8_add_mod25519(U64 vr[], const U64 va[], const U64 vb[])
 {
     /* r = a+b */
     U64 r0 = add64(va[0], vb[0]);
@@ -709,7 +709,7 @@ __INLINE void fe52mb8_add_mod25519(U64 vr[], const U64 va[], const U64 vb[])
     storeu64(&vr[4], cmov_U64(t4, r4, cmask));
 }
 
-__INLINE void fe52mb8_sub_mod25519(U64 vr[], const U64 va[], const U64 vb[])
+__MBX_INLINE void fe52mb8_sub_mod25519(U64 vr[], const U64 va[], const U64 vb[])
 {
     /* r = a-b */
     U64 r0 = sub64(va[0], vb[0]);
@@ -747,7 +747,7 @@ __INLINE void fe52mb8_sub_mod25519(U64 vr[], const U64 va[], const U64 vb[])
     storeu64(&vr[4], cmov_U64(r4, t4, cmask));
 }
 
-__INLINE void fe52mb8_red_p25519(U64 vr[], const U64 va[])
+__MBX_INLINE void fe52mb8_red_p25519(U64 vr[], const U64 va[])
 {
    /* r = a-p */
    U64 r0 = sub64(va[0], loadu64(VPRIME25519_LO));
@@ -788,7 +788,7 @@ __INLINE void fe52mb8_red_p25519(U64 vr[], const U64 va[])
    considering the exponent as
    2^255 - 21 = (2^5) * (2^250 - 1) + 11.
 */
-__INLINE void fe52mb8_inv_mod25519(U64 out[], const U64 z[])
+__MBX_INLINE void fe52mb8_inv_mod25519(U64 out[], const U64 z[])
 {
     __ALIGN64 U64 t0[5];
     __ALIGN64 U64 t1[5];
@@ -906,7 +906,7 @@ static void x25519_scalar_mul(U64 out[], U64 scalar[], U64 point[])
         swap = b;
         fe52_sub(tmp0, x3, z3);
         fe52_sub(tmp1, x2, z2);
-        fe52_add(x2, x2, z2); 
+        fe52_add(x2, x2, z2);
         fe52_add(z2, x3, z3);
 
         #ifdef USE_DUAL_MUL_SQR
@@ -951,7 +951,7 @@ static void x25519_scalar_mul(U64 out[], U64 scalar[], U64 point[])
 //////////////////////////////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////////////////////
 
-__INLINE void ed25519_mul_dual_wonorm(U64 out0[], U64 out1[],
+__MBX_INLINE void ed25519_mul_dual_wonorm(U64 out0[], U64 out1[],
                 const U64 a0[], const U64 b0[],
                 const U64 a1[], const U64 b1[]) {
 
@@ -1047,7 +1047,7 @@ __INLINE void ed25519_mul_dual_wonorm(U64 out0[], U64 out1[],
     storeu64(&vr1[4], r14);
 }
 
-__INLINE void fe52mb8_mul_mod25519_wonorm(U64 vr[], const U64 va[], const U64 vb[])
+__MBX_INLINE void fe52mb8_mul_mod25519_wonorm(U64 vr[], const U64 va[], const U64 vb[])
 {
     U64 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9;
     r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = r8 = r9 = get_zero64();
@@ -1072,7 +1072,7 @@ __INLINE void fe52mb8_mul_mod25519_wonorm(U64 vr[], const U64 va[], const U64 vb
     ROUND_MUL(2, 4, r6, r7)
     ROUND_MUL(3, 3, r6, r7)
     ROUND_MUL(4, 2, r6, r7)
-    
+
     ROUND_MUL(0, 0, r0, r1)
     ROUND_MUL(0, 1, r1, r2)
     ROUND_MUL(0, 2, r2, r3)
@@ -1102,7 +1102,7 @@ __INLINE void fe52mb8_mul_mod25519_wonorm(U64 vr[], const U64 va[], const U64 vb
     storeu64(&vr[4], r4);
 }
 
-__INLINE void fe52mb8_mul121666_mod25519_wonorm(U64 vr[], const U64 va[])
+__MBX_INLINE void fe52mb8_mul121666_mod25519_wonorm(U64 vr[], const U64 va[])
 {
     U64 multiplier = set64(121666);
 
@@ -1136,7 +1136,7 @@ __INLINE void fe52mb8_mul121666_mod25519_wonorm(U64 vr[], const U64 va[])
     storeu64(&vr[4], r4);
 }
 
-__INLINE void x25519_scalar_mul_dual(U64 out[], U64 scalar[], U64 point[])
+__MBX_INLINE void x25519_scalar_mul_dual(U64 out[], U64 scalar[], U64 point[])
 {
     __ALIGN64 U64 x1[5], x2[5], x3[5];
     __ALIGN64 U64        z2[5], z3[5];
@@ -1180,7 +1180,7 @@ __INLINE void x25519_scalar_mul_dual(U64 out[], U64 scalar[], U64 point[])
 
         fe52_sub(tmp0, x3, z3);
         fe52_sub(tmp1, x2, z2);
-        fe52_add(x2, x2, z2); 
+        fe52_add(x2, x2, z2);
         fe52_add(z2, x3, z3);
 
         ed25519_mul_dual_wonorm(z3, z2, x2,tmp0, z2,tmp1);
@@ -1575,19 +1575,19 @@ __ALIGN64 static int64u muTBL52[255][NUMBER_OF_DIGITS(256,DIGIT_SIZE)] = {
    {0x000deda7f334d2df, 0x00051af2a57b4a6a, 0x0006dceaa87bde9c, 0x000d07ba98fc64f8, 0x00006bbe0335c20e},
 };
 
-__ALIGN64 static const int64u U2_0[8] = 
+__ALIGN64 static const int64u U2_0[8] =
     {0x000b1e0137d48290, 0x000b1e0137d48290, 0x000b1e0137d48290, 0x000b1e0137d48290,
      0x000b1e0137d48290, 0x000b1e0137d48290, 0x000b1e0137d48290, 0x000b1e0137d48290};
-__ALIGN64 static const int64u U2_1[8] = 
+__ALIGN64 static const int64u U2_1[8] =
     {0x00051eb4d1207816, 0x00051eb4d1207816, 0x00051eb4d1207816, 0x00051eb4d1207816,
      0x00051eb4d1207816, 0x00051eb4d1207816, 0x00051eb4d1207816, 0x00051eb4d1207816};
-__ALIGN64 static const int64u U2_2[8] = 
+__ALIGN64 static const int64u U2_2[8] =
     {0x000ca2b71d440f6a, 0x000ca2b71d440f6a, 0x000ca2b71d440f6a, 0x000ca2b71d440f6a,
      0x000ca2b71d440f6a, 0x000ca2b71d440f6a, 0x000ca2b71d440f6a, 0x000ca2b71d440f6a};
-__ALIGN64 static const int64u U2_3[8] = 
+__ALIGN64 static const int64u U2_3[8] =
     {0x00054cb52385f46d, 0x00054cb52385f46d, 0x00054cb52385f46d, 0x00054cb52385f46d,
      0x00054cb52385f46d, 0x00054cb52385f46d, 0x00054cb52385f46d, 0x00054cb52385f46d};
-__ALIGN64 static const int64u U2_4[8] = 
+__ALIGN64 static const int64u U2_4[8] =
     {0x0000215132111d83, 0x0000215132111d83, 0x0000215132111d83, 0x0000215132111d83,
      0x0000215132111d83, 0x0000215132111d83, 0x0000215132111d83, 0x0000215132111d83};
 
diff --git a/sources/ippcp/ecnist/ifma_arith_n256.c b/sources/ippcp/ecnist/ifma_arith_n256.c
index b08628b0..b17fa5a4 100644
--- a/sources/ippcp/ecnist/ifma_arith_n256.c
+++ b/sources/ippcp/ecnist/ifma_arith_n256.c
@@ -182,7 +182,7 @@ IPP_OWN_DEFN(m512, ifma_frommont52_n256, (const m512 a))
  * note: z in in Montgomery domain
  *       r in Montgomery domain
  */
-__INLINE m512 ifma_ams52_n256_ntimes(const m512 a, int n)
+__IPPCP_INLINE m512 ifma_ams52_n256_ntimes(const m512 a, int n)
 {
    m512 r = a;
    for (; n > 0; --n) {
diff --git a/sources/ippcp/ecnist/ifma_arith_n384.c b/sources/ippcp/ecnist/ifma_arith_n384.c
index 9ecb3aaf..3e987e73 100644
--- a/sources/ippcp/ecnist/ifma_arith_n384.c
+++ b/sources/ippcp/ecnist/ifma_arith_n384.c
@@ -196,7 +196,7 @@ IPP_OWN_DEFN(m512, ifma_frommont52_n384, (const m512 a))
  * note: z in in Montgomery domain
  *       r in Montgomery domain
  */
-__INLINE m512 ifma_ams52_n384_ntimes(const m512 a, int n)
+__IPPCP_INLINE m512 ifma_ams52_n384_ntimes(const m512 a, int n)
 {
    m512 r = a;
    for (; n > 0; --n) {
diff --git a/sources/ippcp/ecnist/ifma_arith_n521.c b/sources/ippcp/ecnist/ifma_arith_n521.c
index 37a5fa81..b16a322b 100644
--- a/sources/ippcp/ecnist/ifma_arith_n521.c
+++ b/sources/ippcp/ecnist/ifma_arith_n521.c
@@ -238,7 +238,7 @@ static void ifma_ams52_n521(fe521 pr[], const fe521 a)
 #define sqr(R, A) ifma_ams52_n521(&(R), (A))
 
 /* r = base^(2^n) */
-__INLINE IPP_OWN_DEFN(void, ifma_ams52_p521_ntimes, (fe521 pr[], const fe521 a, int n))
+__IPPCP_INLINE IPP_OWN_DEFN(void, ifma_ams52_p521_ntimes, (fe521 pr[], const fe521 a, int n))
 {
    fe521 r;
    FE521_COPY(r, a);
diff --git a/sources/ippcp/ecnist/ifma_arith_p256.c b/sources/ippcp/ecnist/ifma_arith_p256.c
index f3e4909c..b0b74fcd 100644
--- a/sources/ippcp/ecnist/ifma_arith_p256.c
+++ b/sources/ippcp/ecnist/ifma_arith_p256.c
@@ -171,14 +171,14 @@ IPP_OWN_DEFN(void, ifma_amm52_dual_p256, (m512 * r1, const m512 a1, const m512 b
 }
 
 /* R = (A*B) with norm */
-__INLINE m512 ifma_amm52_p256_norm(const m512 a, const m512 b)
+__IPPCP_INLINE m512 ifma_amm52_p256_norm(const m512 a, const m512 b)
 {
    m512 r = ifma_amm52_p256(a, b);
    return ifma_lnorm52(r);
 }
 
 /* R = (A*A) with norm */
-__INLINE m512 ifma_ams52_p256_norm(const m512 a)
+__IPPCP_INLINE m512 ifma_ams52_p256_norm(const m512 a)
 {
    return ifma_amm52_p256_norm(a, a);
 }
@@ -263,7 +263,7 @@ IPP_OWN_DEFN(m512, ifma_frommont52_p256, (const m512 a))
 #define sqr(R, A) (R) = ifma_ams52_p256_norm((A))
 #define mul(R, A, B) (R) = ifma_amm52_p256_norm((A), (B));
 
-__INLINE m512 ifma_ams52_p256_ntimes(m512 a, Ipp32s n)
+__IPPCP_INLINE m512 ifma_ams52_p256_ntimes(m512 a, Ipp32s n)
 {
    for (; n > 0; --n)
       sqr(a, a);
diff --git a/sources/ippcp/ecnist/ifma_arith_p256.h b/sources/ippcp/ecnist/ifma_arith_p256.h
index dedcdf16..3a9f47c7 100644
--- a/sources/ippcp/ecnist/ifma_arith_p256.h
+++ b/sources/ippcp/ecnist/ifma_arith_p256.h
@@ -63,7 +63,7 @@ IPP_OWN_DECL(void, ifma_amm52_dual_p256, (m512 *r1, const m512 a1, const m512 b1
  *
  * \param[in]  a   value (in radix 2^52)
  */
-__INLINE IPP_OWN_DEFN(m512, ifma_ams52_p256, (const m512 a))
+__IPPCP_INLINE IPP_OWN_DEFN(m512, ifma_ams52_p256, (const m512 a))
 {
    return ifma_amm52_p256(a, a);
 }
@@ -78,7 +78,7 @@ __INLINE IPP_OWN_DEFN(m512, ifma_ams52_p256, (const m512 a))
  * \param[out] r2
  * \param[in]  a2  value (in radix 2^52)
  */
-__INLINE IPP_OWN_DEFN(void, ifma_ams52_dual_p256, (m512 * r1, const m512 a1, m512 *r2, const m512 a2))
+__IPPCP_INLINE IPP_OWN_DEFN(void, ifma_ams52_dual_p256, (m512 * r1, const m512 a1, m512 *r2, const m512 a2))
 {
    ifma_amm52_dual_p256(r1, a1, a1, r2, a2, a2);
    return;
diff --git a/sources/ippcp/ecnist/ifma_arith_p384.c b/sources/ippcp/ecnist/ifma_arith_p384.c
index c9fbec94..764259af 100644
--- a/sources/ippcp/ecnist/ifma_arith_p384.c
+++ b/sources/ippcp/ecnist/ifma_arith_p384.c
@@ -242,7 +242,7 @@ IPP_OWN_DEFN(void, ifma_amm52_dual_p384, (m512 * pr1, const m512 a1, const m512
 }
 
 /* R = (A*B) with norm */
-__INLINE m512 ifma_amm52_p384_norm(const m512 a, const m512 b)
+__IPPCP_INLINE m512 ifma_amm52_p384_norm(const m512 a, const m512 b)
 {
    m512 r = ifma_amm52_p384(a, b);
    /* normalization */
@@ -250,7 +250,7 @@ __INLINE m512 ifma_amm52_p384_norm(const m512 a, const m512 b)
 }
 
 /* R = (A*A) with norm */
-__INLINE m512 m512_sqr_norm(const m512 a)
+__IPPCP_INLINE m512 m512_sqr_norm(const m512 a)
 {
    return ifma_amm52_p384_norm(a, a);
 }
@@ -297,7 +297,7 @@ IPP_OWN_DEFN(m512, ifma_frommont52_p384, (const m512 a))
    ifma_amm52_dual_p384(&(R1), (A1), (B1), &(R2), (A2), (B2)); \
    ifma_lnorm52_dual(&(R1), (R1), &(R2), (R2))
 
-__INLINE m512 ifma_ams52_p384_ntimes(const m512 a, Ipp32s n)
+__IPPCP_INLINE m512 ifma_ams52_p384_ntimes(const m512 a, Ipp32s n)
 {
    m512 r = a;
    for (; n > 0; --n)
diff --git a/sources/ippcp/ecnist/ifma_arith_p384.h b/sources/ippcp/ecnist/ifma_arith_p384.h
index 487665e5..6363dd7d 100644
--- a/sources/ippcp/ecnist/ifma_arith_p384.h
+++ b/sources/ippcp/ecnist/ifma_arith_p384.h
@@ -63,7 +63,7 @@ IPP_OWN_DECL(void, ifma_amm52_dual_p384, (m512 * r1, const m512 a1, const m512 b
  *
  * \param[in]  a   value (in radix 2^52)
  */
-__INLINE IPP_OWN_DEFN(m512, ifma_ams52_p384, (const m512 a))
+__IPPCP_INLINE IPP_OWN_DEFN(m512, ifma_ams52_p384, (const m512 a))
 {
    return ifma_amm52_p384(a, a);
 }
@@ -78,7 +78,7 @@ __INLINE IPP_OWN_DEFN(m512, ifma_ams52_p384, (const m512 a))
  * \param[out] r2
  * \param[in]  a2  value (in radix 2^52)
  */
-__INLINE IPP_OWN_DEFN(void, ifma_ams52_dual_p384, (m512 * r1, const m512 a1, m512 *r2, const m512 a2))
+__IPPCP_INLINE IPP_OWN_DEFN(void, ifma_ams52_dual_p384, (m512 * r1, const m512 a1, m512 *r2, const m512 a2))
 {
    ifma_amm52_dual_p384(r1, a1, a1, r2, a2, a2);
    return;
diff --git a/sources/ippcp/ecnist/ifma_arith_p521.c b/sources/ippcp/ecnist/ifma_arith_p521.c
index e284ff25..a2de5ecb 100644
--- a/sources/ippcp/ecnist/ifma_arith_p521.c
+++ b/sources/ippcp/ecnist/ifma_arith_p521.c
@@ -940,14 +940,14 @@ IPP_OWN_DEFN(void, ifma_frommont52_p521, (fe521 pr[], const fe521 a))
    return;
 }
 
-__INLINE IPP_OWN_DEFN(void, ifma_amm52_p521_norm, (fe521 pr[], const fe521 a, const fe521 b))
+__IPPCP_INLINE IPP_OWN_DEFN(void, ifma_amm52_p521_norm, (fe521 pr[], const fe521 a, const fe521 b))
 {
    ifma_amm52_p521(pr, a, b);
    ifma_lnorm52_p521(pr, *pr);
    return;
 }
 
-__INLINE IPP_OWN_DEFN(void, ifma_ams52_p521_norm, (fe521 pr[], const fe521 a))
+__IPPCP_INLINE IPP_OWN_DEFN(void, ifma_ams52_p521_norm, (fe521 pr[], const fe521 a))
 {
    ifma_ams52_p521(pr, a);
    ifma_lnorm52_p521(pr, *pr);
@@ -961,7 +961,7 @@ __INLINE IPP_OWN_DEFN(void, ifma_ams52_p521_norm, (fe521 pr[], const fe521 a))
    ifma_lnorm52_dual_p521(&(R1), (R1), &(R2), (R2))
 
 /* r = base^(2^n) */
-__INLINE IPP_OWN_DEFN(void, ifma_ams52_p521_ntimes, (fe521 pr[], const fe521 a, int n))
+__IPPCP_INLINE IPP_OWN_DEFN(void, ifma_ams52_p521_ntimes, (fe521 pr[], const fe521 a, int n))
 {
    fe521 r;
    FE521_COPY(r, a);
diff --git a/sources/ippcp/ecnist/ifma_defs.h b/sources/ippcp/ecnist/ifma_defs.h
index ad36a599..73ec9c99 100644
--- a/sources/ippcp/ecnist/ifma_defs.h
+++ b/sources/ippcp/ecnist/ifma_defs.h
@@ -45,7 +45,7 @@
  * \return 0xFF - if MSB = 1
  * \return 0x00 - if MSB = 0
  */
-__INLINE mask8 check_bit(const mask8 a, int bit)
+__IPPCP_INLINE mask8 check_bit(const mask8 a, int bit)
 {
    return (mask8)((mask8)0 - ((a >> bit) & 1u));
 }
@@ -59,7 +59,7 @@ __INLINE mask8 check_bit(const mask8 a, int bit)
  * \return 0xFF - if input value is all zeroes
  * \return 0x00 - if input value is not all zeroes
  */
-__INLINE mask8 is_zero_i64(const m512 a)
+__IPPCP_INLINE mask8 is_zero_i64(const m512 a)
 {
    const mask8 mask = cmp_i64_mask(a, setzero_i64(), _MM_CMPINT_NE);
    return check_bit((~mask & (mask - 1u)), 7);
diff --git a/sources/ippcp/ecnist/ifma_defs_p521.h b/sources/ippcp/ecnist/ifma_defs_p521.h
index fb5a843e..ad060e2d 100644
--- a/sources/ippcp/ecnist/ifma_defs_p521.h
+++ b/sources/ippcp/ecnist/ifma_defs_p521.h
@@ -61,12 +61,12 @@ static const __ALIGN64 Ipp64u P521R1_ONE52[P521R1_NUM_CHUNK][P521R1_LENFE521_52]
    FE521_MID(R) = m256_loadu_i64(FE521_MID(A)); \
    FE521_HI(R)  = m256_loadu_i64(FE521_HI(A))
 
-__INLINE mask8 is_msb_m256(const mask8 a)
+__IPPCP_INLINE mask8 is_msb_m256(const mask8 a)
 {
    return ((mask8)0 - (a >> 7));
 }
 
-__INLINE mask8 is_zero_m256(const m256i a)
+__IPPCP_INLINE mask8 is_zero_m256(const m256i a)
 {
    const mask8 mask = _mm256_cmp_epi64_mask(a, m256_setzero_i64(), _MM_CMPINT_NE);
    return is_msb_m256((~mask & (mask - 1)));
diff --git a/sources/ippcp/ecnist/ifma_ecpoint_p256.c b/sources/ippcp/ecnist/ifma_ecpoint_p256.c
index 6d836413..5bd98811 100644
--- a/sources/ippcp/ecnist/ifma_ecpoint_p256.c
+++ b/sources/ippcp/ecnist/ifma_ecpoint_p256.c
@@ -510,7 +510,7 @@ static __NOINLINE void clear_secret_context(Ipp16u *wval,
 
 #define WIN_SIZE (5)
 
-__INLINE mask8 is_eq_mask(const Ipp32s a, const Ipp32s b)
+__IPPCP_INLINE mask8 is_eq_mask(const Ipp32s a, const Ipp32s b)
 {
    const Ipp32s eq  = a ^ b;
    const Ipp32s v   = ~eq & (eq - 1);
@@ -518,7 +518,7 @@ __INLINE mask8 is_eq_mask(const Ipp32s a, const Ipp32s b)
    return (mask8)(0 - msb);
 }
 
-__INLINE void extract_table_point(P256_POINT_IFMA *r, const Ipp32s digit, const P256_POINT_IFMA *tbl)
+__IPPCP_INLINE void extract_table_point(P256_POINT_IFMA *r, const Ipp32s digit, const P256_POINT_IFMA *tbl)
 {
    Ipp32s idx = digit - 1;
 
@@ -659,7 +659,7 @@ IPP_OWN_DEFN(void, ifma_ec_nistp256_mul_point, (P256_POINT_IFMA * r, const P256_
 #define BP_WIN_SIZE BASE_POINT_WIN_SIZE
 #define BP_N_ENTRY BASE_POINT_N_ENTRY
 
-__INLINE void extract_point_affine(P256_POINT_AFFINE_IFMA *r,
+__IPPCP_INLINE void extract_point_affine(P256_POINT_AFFINE_IFMA *r,
                                    const P256_POINT_AFFINE_IFMA_MEM *tbl,
                                    const Ipp32s digit)
 {
diff --git a/sources/ippcp/ecnist/ifma_ecpoint_p256.h b/sources/ippcp/ecnist/ifma_ecpoint_p256.h
index c6acb52b..269bda6a 100644
--- a/sources/ippcp/ecnist/ifma_ecpoint_p256.h
+++ b/sources/ippcp/ecnist/ifma_ecpoint_p256.h
@@ -138,7 +138,7 @@ IPP_OWN_DECL(void, p256r1_select_ap_w7_ifma, (BNU_CHUNK_T * pAffinePoint, const
 #include "pcpgfpstuff.h"
 #include "pcpgfpecstuff.h"
 
-__INLINE void recode_point_to_mont52(P256_POINT_IFMA *pR,
+__IPPCP_INLINE void recode_point_to_mont52(P256_POINT_IFMA *pR,
                                      const BNU_CHUNK_T *pP,
                                      BNU_CHUNK_T *pPool,
                                      ifmaArithMethod *method,
@@ -166,7 +166,7 @@ __INLINE void recode_point_to_mont52(P256_POINT_IFMA *pR,
    pR->z = p_to_mont(pR->z);
 }
 
-__INLINE void recode_point_to_mont64(IppsGFpECPoint *pR,
+__IPPCP_INLINE void recode_point_to_mont64(IppsGFpECPoint *pR,
                                      P256_POINT_IFMA *pP,
                                      BNU_CHUNK_T *pPool,
                                      ifmaArithMethod *method,
diff --git a/sources/ippcp/ecnist/ifma_ecpoint_p384.c b/sources/ippcp/ecnist/ifma_ecpoint_p384.c
index 449adcd3..ed368f62 100644
--- a/sources/ippcp/ecnist/ifma_ecpoint_p384.c
+++ b/sources/ippcp/ecnist/ifma_ecpoint_p384.c
@@ -546,7 +546,7 @@ static __NOINLINE void clear_secret_context(Ipp16u *wval,
 
 #define WIN_SIZE (5)
 
-__INLINE mask8 is_eq_mask(const Ipp32s a, const Ipp32s b)
+__IPPCP_INLINE mask8 is_eq_mask(const Ipp32s a, const Ipp32s b)
 {
    const Ipp32s eq  = a ^ b;
    const Ipp32s v   = ~eq & (eq - 1);
@@ -554,7 +554,7 @@ __INLINE mask8 is_eq_mask(const Ipp32s a, const Ipp32s b)
    return (mask8)(0 - msb);
 }
 
-__INLINE void extract_table_point(P384_POINT_IFMA *r, const Ipp32s digit, const P384_POINT_IFMA *tbl)
+__IPPCP_INLINE void extract_table_point(P384_POINT_IFMA *r, const Ipp32s digit, const P384_POINT_IFMA *tbl)
 {
    Ipp32s idx = digit - 1;
 
@@ -691,7 +691,7 @@ IPP_OWN_DEFN(void, ifma_ec_nistp384_mul_point, (P384_POINT_IFMA * r, const P384_
 #define BP_WIN_SIZE BASE_POINT_WIN_SIZE
 #define BP_N_ENTRY BASE_POINT_N_ENTRY
 
-__INLINE void extract_point_affine(P384_POINT_AFFINE_IFMA *r,
+__IPPCP_INLINE void extract_point_affine(P384_POINT_AFFINE_IFMA *r,
                                    const P384_POINT_AFFINE_IFMA_MEM *tbl,
                                    const Ipp32s digit)
 {
diff --git a/sources/ippcp/ecnist/ifma_ecpoint_p384.h b/sources/ippcp/ecnist/ifma_ecpoint_p384.h
index e682808b..a33a8bd6 100644
--- a/sources/ippcp/ecnist/ifma_ecpoint_p384.h
+++ b/sources/ippcp/ecnist/ifma_ecpoint_p384.h
@@ -138,7 +138,7 @@ IPP_OWN_DECL(void, p384r1_select_ap_w4_ifma, (BNU_CHUNK_T * pAffinePoint, const
 #include "pcpgfpstuff.h"
 #include "pcpgfpecstuff.h"
 
-__INLINE void recode_point_to_mont52(P384_POINT_IFMA *pR,
+__IPPCP_INLINE void recode_point_to_mont52(P384_POINT_IFMA *pR,
                                      const BNU_CHUNK_T *pP,
                                      BNU_CHUNK_T *pPool,
                                      ifmaArithMethod *method,
@@ -166,7 +166,7 @@ __INLINE void recode_point_to_mont52(P384_POINT_IFMA *pR,
    pR->z = p_to_mont(pR->z);
 }
 
-__INLINE void recode_point_to_mont64(const IppsGFpECPoint *pR,
+__IPPCP_INLINE void recode_point_to_mont64(const IppsGFpECPoint *pR,
                                      P384_POINT_IFMA *pP,
                                      BNU_CHUNK_T *pPool,
                                      ifmaArithMethod *method,
diff --git a/sources/ippcp/ecnist/ifma_ecpoint_p521.c b/sources/ippcp/ecnist/ifma_ecpoint_p521.c
index 9aac2e10..51a7542e 100644
--- a/sources/ippcp/ecnist/ifma_ecpoint_p521.c
+++ b/sources/ippcp/ecnist/ifma_ecpoint_p521.c
@@ -519,7 +519,7 @@ static __NOINLINE void clear_secret_context(Ipp16u *wval,
 
 #define WIN_SIZE (5)
 
-__INLINE mask8 is_eq_mask(const Ipp32s a, const Ipp32s b)
+__IPPCP_INLINE mask8 is_eq_mask(const Ipp32s a, const Ipp32s b)
 {
    const Ipp32s eq  = a ^ b;
    const Ipp32s v   = ~eq & (eq - 1);
@@ -527,7 +527,7 @@ __INLINE mask8 is_eq_mask(const Ipp32s a, const Ipp32s b)
    return (mask8)(0 - msb);
 }
 
-__INLINE void extract_table_point(P521_POINT_IFMA *r, const Ipp32s digit, const P521_POINT_IFMA tbl[])
+__IPPCP_INLINE void extract_table_point(P521_POINT_IFMA *r, const Ipp32s digit, const P521_POINT_IFMA tbl[])
 {
    Ipp32s idx = digit - 1;
 
@@ -664,7 +664,7 @@ IPP_OWN_DEFN(void, ifma_ec_nistp521_mul_point, (P521_POINT_IFMA * r, const P521_
 #define BP_WIN_SIZE BASE_POINT_WIN_SIZE
 #define BP_N_ENTRY BASE_POINT_N_ENTRY
 
-__INLINE void extract_point_affine(P521_POINT_AFFINE_IFMA *r,
+__IPPCP_INLINE void extract_point_affine(P521_POINT_AFFINE_IFMA *r,
                                    const P521_POINT_AFFINE_IFMA_MEM *tbl,
                                    const Ipp32s digit)
 {
diff --git a/sources/ippcp/ecnist/ifma_ecpoint_p521.h b/sources/ippcp/ecnist/ifma_ecpoint_p521.h
index 4930e868..f4575028 100644
--- a/sources/ippcp/ecnist/ifma_ecpoint_p521.h
+++ b/sources/ippcp/ecnist/ifma_ecpoint_p521.h
@@ -100,7 +100,7 @@ IPP_OWN_DECL(void, ifma_ec_nistp521_add_point_affine, (P521_POINT_IFMA * r, cons
 #include "pcpgfpstuff.h"
 #include "pcpgfpecstuff.h"
 
-__INLINE void recode_point_to_mont52(P521_POINT_IFMA *pR,
+__IPPCP_INLINE void recode_point_to_mont52(P521_POINT_IFMA *pR,
                                      const BNU_CHUNK_T *pP,
                                      BNU_CHUNK_T *pPool,
                                      ifmaArithMethod_p521 *method,
@@ -128,7 +128,7 @@ __INLINE void recode_point_to_mont52(P521_POINT_IFMA *pR,
    p_to_mont(&(pR->z), pR->z);
 }
 
-__INLINE void recode_point_to_mont64(IppsGFpECPoint *pR,
+__IPPCP_INLINE void recode_point_to_mont64(IppsGFpECPoint *pR,
                                      P521_POINT_IFMA *pP,
                                      BNU_CHUNK_T *pPool,
                                      ifmaArithMethod_p521 *method,
diff --git a/sources/ippcp/exports.linux.lib-export b/sources/ippcp/exports.linux.lib-export
index 98a344d5..6c1af308 100644
--- a/sources/ippcp/exports.linux.lib-export
+++ b/sources/ippcp/exports.linux.lib-export
@@ -562,6 +562,12 @@ EXTERN (ippsXMSSSetSignatureState)
 EXTERN (ippsXMSSSignatureStateGetSize)
 EXTERN (ippsXMSSPublicKeyStateGetSize)
 EXTERN (ippsXMSSBufferGetSize)
+EXTERN (ippsLMSBufferGetSize)
+EXTERN (ippsLMSSignatureStateGetSize)
+EXTERN (ippsLMSPublicKeyStateGetSize)
+EXTERN (ippsLMSSetPublicKeyState)
+EXTERN (ippsLMSSetSignatureState)
+EXTERN (ippsLMSVerify)
 
 VERSION {
  {
@@ -1130,6 +1136,12 @@ VERSION {
    ippsXMSSSignatureStateGetSize;
    ippsXMSSPublicKeyStateGetSize;
    ippsXMSSBufferGetSize;
+   ippsLMSBufferGetSize;
+   ippsLMSSignatureStateGetSize;
+   ippsLMSPublicKeyStateGetSize;
+   ippsLMSSetPublicKeyState;
+   ippsLMSSetSignatureState;
+   ippsLMSVerify;
   local: *;
  };
 }
diff --git a/sources/ippcp/exports.linux.selftests-export b/sources/ippcp/exports.linux.selftests-export
index 8e8b45ca..8311fc47 100644
--- a/sources/ippcp/exports.linux.selftests-export
+++ b/sources/ippcp/exports.linux.selftests-export
@@ -562,6 +562,12 @@ EXTERN (ippsXMSSSetSignatureState)
 EXTERN (ippsXMSSSignatureStateGetSize)
 EXTERN (ippsXMSSPublicKeyStateGetSize)
 EXTERN (ippsXMSSBufferGetSize)
+EXTERN (ippsLMSBufferGetSize)
+EXTERN (ippsLMSSignatureStateGetSize)
+EXTERN (ippsLMSPublicKeyStateGetSize)
+EXTERN (ippsLMSSetPublicKeyState)
+EXTERN (ippsLMSSetSignatureState)
+EXTERN (ippsLMSVerify)
 
 EXTERN (ippcp_is_fips_approved_func)
 EXTERN (fips_selftest_ippsAESEncryptDecrypt_get_size)
@@ -1183,6 +1189,12 @@ VERSION {
    ippsXMSSSignatureStateGetSize;
    ippsXMSSPublicKeyStateGetSize;
    ippsXMSSBufferGetSize;
+   ippsLMSBufferGetSize;
+   ippsLMSSignatureStateGetSize;
+   ippsLMSPublicKeyStateGetSize;
+   ippsLMSSetPublicKeyState;
+   ippsLMSSetSignatureState;
+   ippsLMSVerify;
 
    ippcp_is_fips_approved_func;
    fips_selftest_ippsAESEncryptDecrypt_get_size;
diff --git a/sources/ippcp/exports.macosx.lib-export b/sources/ippcp/exports.macosx.lib-export
index 931f516f..ddc7edd1 100644
--- a/sources/ippcp/exports.macosx.lib-export
+++ b/sources/ippcp/exports.macosx.lib-export
@@ -562,3 +562,9 @@ _ippsXMSSSetSignatureState
 _ippsXMSSSignatureStateGetSize
 _ippsXMSSPublicKeyStateGetSize
 _ippsXMSSBufferGetSize
+_ippsLMSBufferGetSize
+_ippsLMSSignatureStateGetSize
+_ippsLMSPublicKeyStateGetSize
+_ippsLMSSetPublicKeyState
+_ippsLMSSetSignatureState
+_ippsLMSVerify
diff --git a/sources/ippcp/fips_cert/selftest_ecdsa_sign_verify.c b/sources/ippcp/fips_cert/selftest_ecdsa_sign_verify.c
index 0c676836..593a630f 100644
--- a/sources/ippcp/fips_cert/selftest_ecdsa_sign_verify.c
+++ b/sources/ippcp/fips_cert/selftest_ecdsa_sign_verify.c
@@ -50,12 +50,6 @@ static const Ipp8u r[]          = { 0xac,0xc2,0xc8,0x79,0x6f,0x5e,0xbb,0xca,0x7a
 static const Ipp8u s[]          = { 0x03,0x89,0x05,0xcc,0x2a,0xda,0xcd,0x3c,0x5a,0x17,0x6f,0xe9,0x18,0xb2,0x97,0xef,
                                     0x1c,0x37,0xf7,0x2b,0x26,0x76,0x6c,0x78,0xb2,0xa6,0x05,0xca,0x19,0x78,0xf7,0x8b };
 
-/* pub key coordinates */
-static const Ipp8u qx[]          = { 0x83,0xbf,0x71,0xc2,0x46,0xff,0x59,0x3c,0x2f,0xb1,0xbf,0x4b,0xe9,0x5d,0x56,0xd3,
-                                     0xcc,0x8f,0xdb,0x48,0xa2,0xbf,0x33,0xf0,0xf4,0xc7,0x5f,0x07,0x1c,0xe9,0xcb,0x1c};
-static const Ipp8u qy[]          = { 0xa9,0x4c,0x9a,0xa8,0x5c,0xcd,0x7c,0xdc,0x78,0x4e,0x40,0xb7,0x93,0xca,0xb7,0x6d,
-                                     0xe0,0x13,0x61,0x0e,0x2c,0xdb,0x1f,0x1a,0xa2,0xf9,0x11,0x88,0xc6,0x14,0x40,0xce };
-
 static const unsigned int primeBitSize = 256;
 
 static const unsigned int ordWordSize   = 8;
@@ -169,12 +163,18 @@ IPPFUN(fips_test_status, fips_selftest_ippsGFpECSignDSA, (Ipp8u *pGFpBuff, Ipp8u
 
         int gfpECBuffSize = 0;
         sts = fips_selftest_ippsGFpECSignVerifyDSA_get_size_GFpEC_buff(&gfpECBuffSize, pGFpBuff);
-        if (sts != ippStsNoErr) { return IPPCP_ALGO_SELFTEST_BAD_ARGS_ERR; }
+        if (sts != ippStsNoErr) {
+            MEMORY_FREE(pGFpBuff)
+            return IPPCP_ALGO_SELFTEST_BAD_ARGS_ERR;
+        }
         pGFpECBuff = malloc((size_t)gfpECBuffSize);
 
         int dataBuffSize = 0;
         sts = fips_selftest_ippsGFpECSignVerifyDSA_get_size_data_buff(&dataBuffSize, pGFpBuff, pGFpECBuff);
-        if (sts != ippStsNoErr) { return IPPCP_ALGO_SELFTEST_BAD_ARGS_ERR; }
+        if (sts != ippStsNoErr) {
+            MEMORY_FREE_2(pGFpBuff, pGFpECBuff)
+            return IPPCP_ALGO_SELFTEST_BAD_ARGS_ERR;
+        }
         pDataBuff = malloc((size_t)dataBuffSize);
     }
 #else
@@ -325,12 +325,18 @@ IPPFUN(fips_test_status, fips_selftest_ippsGFpECVerifyDSA, (Ipp8u *pGFpBuff, Ipp
 
         int gfpECBuffSize = 0;
         sts = fips_selftest_ippsGFpECSignVerifyDSA_get_size_GFpEC_buff(&gfpECBuffSize, pGFpBuff);
-        if (sts != ippStsNoErr) { return IPPCP_ALGO_SELFTEST_BAD_ARGS_ERR; }
+        if (sts != ippStsNoErr) {
+            MEMORY_FREE(pGFpBuff)
+            return IPPCP_ALGO_SELFTEST_BAD_ARGS_ERR;
+        }
         pGFpECBuff = malloc((size_t)gfpECBuffSize);
 
         int dataBuffSize = 0;
         sts = fips_selftest_ippsGFpECSignVerifyDSA_get_size_data_buff(&dataBuffSize, pGFpBuff, pGFpECBuff);
-        if (sts != ippStsNoErr) { return IPPCP_ALGO_SELFTEST_BAD_ARGS_ERR; }
+        if (sts != ippStsNoErr) {
+            MEMORY_FREE_2(pGFpBuff, pGFpECBuff)
+            return IPPCP_ALGO_SELFTEST_BAD_ARGS_ERR;
+        }
         pDataBuff = malloc((size_t)dataBuffSize);
     }
 #else
@@ -472,12 +478,18 @@ IPPFUN(fips_test_status, fips_selftest_ippsGFpECPrivateKey, (Ipp8u *pGFpBuff, Ip
 
         int gfpECBuffSize = 0;
         sts = fips_selftest_ippsGFpECSignVerifyDSA_get_size_GFpEC_buff(&gfpECBuffSize, pGFpBuff);
-        if (sts != ippStsNoErr) { return IPPCP_ALGO_SELFTEST_BAD_ARGS_ERR; }
+        if (sts != ippStsNoErr) {
+            MEMORY_FREE(pGFpBuff)
+            return IPPCP_ALGO_SELFTEST_BAD_ARGS_ERR;
+        }
         pGFpECBuff = malloc((size_t)gfpECBuffSize);
 
         int dataBuffSize = 0;
         sts = fips_selftest_ippsGFpECSignVerifyDSA_get_size_data_buff(&dataBuffSize, pGFpBuff, pGFpECBuff);
-        if (sts != ippStsNoErr) { return IPPCP_ALGO_SELFTEST_BAD_ARGS_ERR; }
+        if (sts != ippStsNoErr) {
+            MEMORY_FREE_2(pGFpBuff, pGFpECBuff)
+            return IPPCP_ALGO_SELFTEST_BAD_ARGS_ERR;
+        }
         pDataBuff = malloc((size_t)dataBuffSize);
     }
 #else
diff --git a/sources/ippcp/gsmod_montinv.c b/sources/ippcp/gsmod_montinv.c
index 3c989807..19fe91bf 100644
--- a/sources/ippcp/gsmod_montinv.c
+++ b/sources/ippcp/gsmod_montinv.c
@@ -14,11 +14,11 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive. Modular Arithmetic Engine. General Functionality
-// 
+//
 //  Contents:
 //        gs_mont_inv()
 //
@@ -31,7 +31,7 @@
 #include "gsmodstuff.h"
 #include "pcpmask_ct.h"
 
-__INLINE BNU_CHUNK_T* cpPow2_ct(int bit, BNU_CHUNK_T* dst, int len)
+__IPPCP_INLINE BNU_CHUNK_T* cpPow2_ct(int bit, BNU_CHUNK_T* dst, int len)
 {
    int slot = bit/BNU_CHUNK_BITS;
    BNU_CHUNK_T value = (BNU_CHUNK_T)1 << (bit%BNU_CHUNK_BITS);
diff --git a/sources/ippcp/gsmodmethod.h b/sources/ippcp/gsmodmethod.h
index d9462215..e84b0526 100644
--- a/sources/ippcp/gsmodmethod.h
+++ b/sources/ippcp/gsmodmethod.h
@@ -54,17 +54,17 @@ typedef struct _gsModMethod {
 
 /* These functions should not be used, because they have non-constant execution time, see their safe analogues in pcpmask_ct.h */
 #if 0
-__INLINE BNU_CHUNK_T cpIsZero(BNU_CHUNK_T x)
+__IPPCP_INLINE BNU_CHUNK_T cpIsZero(BNU_CHUNK_T x)
 {  return x==0; }
-__INLINE BNU_CHUNK_T cpIsNonZero(BNU_CHUNK_T x)
+__IPPCP_INLINE BNU_CHUNK_T cpIsNonZero(BNU_CHUNK_T x)
 {  return x!=0; }
-__INLINE BNU_CHUNK_T cpIsOdd(BNU_CHUNK_T x)
+__IPPCP_INLINE BNU_CHUNK_T cpIsOdd(BNU_CHUNK_T x)
 {  return x&1; }
-__INLINE BNU_CHUNK_T cpIsEven(BNU_CHUNK_T x)
+__IPPCP_INLINE BNU_CHUNK_T cpIsEven(BNU_CHUNK_T x)
 {  return 1-cpIsOdd(x); }
 
 /* dst[] = (flag)? src[] : dst[] */
-__INLINE void cpMaskMove_gs(BNU_CHUNK_T* dst, const BNU_CHUNK_T* src, int len, BNU_CHUNK_T moveFlag)
+__IPPCP_INLINE void cpMaskMove_gs(BNU_CHUNK_T* dst, const BNU_CHUNK_T* src, int len, BNU_CHUNK_T moveFlag)
 {
    BNU_CHUNK_T srcMask = 0-cpIsNonZero(moveFlag);
    BNU_CHUNK_T dstMask = ~srcMask;
diff --git a/sources/ippcp/gsmodstuff.h b/sources/ippcp/gsmodstuff.h
index 25de7105..bd17d129 100644
--- a/sources/ippcp/gsmodstuff.h
+++ b/sources/ippcp/gsmodstuff.h
@@ -101,7 +101,7 @@ typedef struct _gsModEngine
 //    poolReq   Required pool
 *F*/
 
-__INLINE BNU_CHUNK_T* gsModPoolAlloc(gsModEngine* pME, int poolReq)
+__IPPCP_INLINE BNU_CHUNK_T* gsModPoolAlloc(gsModEngine* pME, int poolReq)
 {
    BNU_CHUNK_T* pPool = MOD_BUFFER(pME, pME->poolLenUsed);
 
@@ -126,7 +126,7 @@ __INLINE BNU_CHUNK_T* gsModPoolAlloc(gsModEngine* pME, int poolReq)
 //    poolReq   Required pool
 *F*/
 
-__INLINE void gsModPoolFree(gsModEngine* pME, int poolReq)
+__IPPCP_INLINE void gsModPoolFree(gsModEngine* pME, int poolReq)
 {
    if(pME->poolLenUsed < poolReq)
       poolReq = pME->poolLenUsed;
diff --git a/sources/ippcp/ifma_exp52x20.c b/sources/ippcp/ifma_exp52x20.c
index 9ad9f37c..2f186235 100644
--- a/sources/ippcp/ifma_exp52x20.c
+++ b/sources/ippcp/ifma_exp52x20.c
@@ -34,7 +34,7 @@
 #define AMM ifma256_amm52x20
 #define AMS ifma256_ams52x20
 
-__INLINE void extract_multiplier(Ipp64u *red_Y,
+__IPPCP_INLINE void extract_multiplier(Ipp64u *red_Y,
                            const Ipp64u red_table[1U << EXP_WIN_SIZE][LEN52],
                                  int red_table_idx)
 {
diff --git a/sources/ippcp/ifma_exp52x20_dual.c b/sources/ippcp/ifma_exp52x20_dual.c
index f6e5604a..fc69cd54 100644
--- a/sources/ippcp/ifma_exp52x20_dual.c
+++ b/sources/ippcp/ifma_exp52x20_dual.c
@@ -34,7 +34,7 @@
 #define DAMM ifma256_amm52x20_dual
 #define DAMS ifma256_ams52x20_dual
 
-__INLINE void extract_multiplier_n(Ipp64u *red_Y,
+__IPPCP_INLINE void extract_multiplier_n(Ipp64u *red_Y,
                              const Ipp64u red_table[1U << EXP_WIN_SIZE][2][LEN52],
                                    int red_table_idx, int tbl_idx)
 {
diff --git a/sources/ippcp/ifma_exp52x30_dual.c b/sources/ippcp/ifma_exp52x30_dual.c
index 73b42356..ed432581 100644
--- a/sources/ippcp/ifma_exp52x30_dual.c
+++ b/sources/ippcp/ifma_exp52x30_dual.c
@@ -34,7 +34,7 @@
 #define DAMM ifma256_amm52x30_dual
 #define DAMS ifma256_ams52x30_dual
 
-__INLINE void extract_multiplier_n(Ipp64u *red_Y,
+__IPPCP_INLINE void extract_multiplier_n(Ipp64u *red_Y,
                              const Ipp64u red_table[1U << EXP_WIN_SIZE][2][LEN52],
                                    int red_table_idx, int tbl_idx)
 {
diff --git a/sources/ippcp/ifma_exp52x40_dual.c b/sources/ippcp/ifma_exp52x40_dual.c
index a9da4bc0..2bf17ed9 100644
--- a/sources/ippcp/ifma_exp52x40_dual.c
+++ b/sources/ippcp/ifma_exp52x40_dual.c
@@ -34,7 +34,7 @@
 #define DAMM ifma256_amm52x40_dual
 #define DAMS ifma256_ams52x40_dual
 
-__INLINE void extract_multiplier_n(Ipp64u *red_Y,
+__IPPCP_INLINE void extract_multiplier_n(Ipp64u *red_Y,
                              const Ipp64u red_table[1U << EXP_WIN_SIZE][2][LEN52],
                                    int red_table_idx, int tbl_idx)
 {
diff --git a/sources/ippcp/ifma_math_avx512vl.h b/sources/ippcp/ifma_math_avx512vl.h
index c5c26426..1873671a 100644
--- a/sources/ippcp/ifma_math_avx512vl.h
+++ b/sources/ippcp/ifma_math_avx512vl.h
@@ -45,11 +45,11 @@
   #define SIMD_BYTES  (SIMD_LEN/8)
   #define SIMD_QWORDS (SIMD_LEN/64)
 
-  __INLINE U64 loadu64(const void *p) {
+  __IPPCP_INLINE U64 loadu64(const void *p) {
     return _mm256_loadu_si256((U64*)p);
   }
 
-  __INLINE void storeu64(const void *p, U64 v) {
+  __IPPCP_INLINE void storeu64(const void *p, U64 v) {
     _mm256_storeu_si256((U64*)p, v);
   }
 
@@ -80,7 +80,7 @@
           __asm__ ( "vpmadd52huq " #o "(%2), %1, %0" : "+x" (r): "x" (b), "r" (c) ); \
       }
   #else
-      /* Use IFMA instrinsics for all other compilers */
+      /* Use IFMA intrinsics for all other compilers */
       static U64 fma52lo(U64 a, U64 b, U64 c)
       {
         return _mm256_madd52lo_epu64(a, b, c);
@@ -102,7 +102,7 @@
       }
   #endif
 
-  __INLINE U64 mul52lo(U64 b, U64 c)
+  __IPPCP_INLINE U64 mul52lo(U64 b, U64 c)
   {
     return fma52lo(_mm256_setzero_si256(), b, c);
   }
@@ -110,44 +110,44 @@
   #define fma52lo_mem(r, a, b, c, o) _mm_madd52lo_epu64_(r, a, b, c, o)
   #define fma52hi_mem(r, a, b, c, o) _mm_madd52hi_epu64_(r, a, b, c, o)
 
-  __INLINE U64 add64(U64 a, U64 b)
+  __IPPCP_INLINE U64 add64(U64 a, U64 b)
   {
     return _mm256_add_epi64(a, b);
   }
 
-  __INLINE U64 sub64(U64 a, U64 b)
+  __IPPCP_INLINE U64 sub64(U64 a, U64 b)
   {
     return _mm256_sub_epi64(a, b);
   }
 
-  __INLINE U64 get_zero64()
+  __IPPCP_INLINE U64 get_zero64()
   {
     return _mm256_setzero_si256();
   }
 
-  __INLINE void set_zero64(U64 *a)
+  __IPPCP_INLINE void set_zero64(U64 *a)
   {
     *a = _mm256_xor_si256(*a, *a);
   }
 
-  __INLINE U64 set1(unsigned long long a)
+  __IPPCP_INLINE U64 set1(unsigned long long a)
   {
     return _mm256_set1_epi64x((long long)a);
   }
 
-  __INLINE U64 srli64(U64 a, int s)
+  __IPPCP_INLINE U64 srli64(U64 a, int s)
   {
     return _mm256_srli_epi64(a, s);
   }
 
   #define slli64 _mm256_slli_epi64
 
-  __INLINE U64 and64_const(U64 a, unsigned long long mask)
+  __IPPCP_INLINE U64 and64_const(U64 a, unsigned long long mask)
   {
     return _mm256_and_si256(a, _mm256_set1_epi64x((long long)mask));
   }
 
-  __INLINE U64 and64(U64 a, U64 mask)
+  __IPPCP_INLINE U64 and64(U64 a, U64 mask)
   {
     return _mm256_and_si256(a, mask);
   }
diff --git a/sources/ippcp/ippcp.def b/sources/ippcp/ippcp.def
index 9f66d9f5..6fbcc669 100644
--- a/sources/ippcp/ippcp.def
+++ b/sources/ippcp/ippcp.def
@@ -564,3 +564,9 @@ ippsXMSSSetSignatureState
 ippsXMSSSignatureStateGetSize
 ippsXMSSPublicKeyStateGetSize
 ippsXMSSBufferGetSize
+ippsLMSBufferGetSize
+ippsLMSSignatureStateGetSize
+ippsLMSPublicKeyStateGetSize
+ippsLMSSetPublicKeyState
+ippsLMSSetSignatureState
+ippsLMSVerify
diff --git a/sources/ippcp/ippcp_fips_selftests.def b/sources/ippcp/ippcp_fips_selftests.def
index 9d321e40..cdc59d2d 100644
--- a/sources/ippcp/ippcp_fips_selftests.def
+++ b/sources/ippcp/ippcp_fips_selftests.def
@@ -564,6 +564,12 @@ ippsXMSSSetSignatureState
 ippsXMSSSignatureStateGetSize
 ippsXMSSPublicKeyStateGetSize
 ippsXMSSBufferGetSize
+ippsLMSBufferGetSize
+ippsLMSSignatureStateGetSize
+ippsLMSPublicKeyStateGetSize
+ippsLMSSetPublicKeyState
+ippsLMSSetSignatureState
+ippsLMSVerify
 
 ippcp_is_fips_approved_func
 
diff --git a/sources/ippcp/lms/lms_setters_getters.c b/sources/ippcp/lms/lms_setters_getters.c
new file mode 100644
index 00000000..204bd912
--- /dev/null
+++ b/sources/ippcp/lms/lms_setters_getters.c
@@ -0,0 +1,301 @@
+/*************************************************************************
+* Copyright (C) 2024 Intel Corporation
+*
+* Licensed under the Apache License,  Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* 	http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law  or agreed  to  in  writing,  software
+* distributed under  the License  is  distributed  on  an  "AS IS"  BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the  specific  language  governing  permissions  and
+* limitations under the License.
+*************************************************************************/
+
+#include "owndefs.h"
+#include "lms_internal/lms.h"
+
+/*F*
+//    Name: ippsLMSBufferGetSize
+//
+// Purpose: Get the LMS temporary buffer size (bytes).
+//
+// Returns:                Reason:
+//    ippStsNullPtrErr        pSize == NULL
+//    ippStsBadArgErr         lmsType.lmotsOIDAlgo > LMOTS_SHA256_N24_W8
+//                            lmsType.lmotsOIDAlgo < LMOTS_SHA256_N32_W1
+//                            lmsType.lmsOIDAlgo   > LMS_SHA256_M24_H25
+//                            lmsType.lmsOIDAlgo   < LMS_SHA256_M32_H5
+//    ippStsLengthErr         maxMessageLength < 1
+//                            maxMessageLength > (Ipp32s)(IPP_MAX_32S) - 
+//                            - (byteSizeI + 4(q byteSize) + 2(D_MESG byteSize) + n(C byteSize))
+//    ippStsNoErr             no errors
+//
+// Parameters:
+//    pSize             pointer to the work buffer's byte size
+//    maxMessageLength  maximum length of the processing message
+//    lmsType           structure with LMS parameters lmotsOIDAlgo and lmsOIDAlgo
+//
+*F*/
+
+IPPFUN(IppStatus, ippsLMSBufferGetSize, (Ipp32s* pSize, Ipp32s maxMessageLength, const IppsLMSAlgoType lmsType))
+{
+    IppStatus ippcpSts = ippStsNoErr;
+
+    /* Input parameters check */
+    IPP_BAD_PTR1_RET(pSize);
+    IPP_BADARG_RET(lmsType.lmotsOIDAlgo > LMOTS_SHA256_N24_W8, ippStsBadArgErr);
+    IPP_BADARG_RET(lmsType.lmotsOIDAlgo < LMOTS_SHA256_N32_W1, ippStsBadArgErr);
+    IPP_BADARG_RET(lmsType.lmsOIDAlgo > LMS_SHA256_M24_H25,  ippStsBadArgErr);
+    IPP_BADARG_RET(lmsType.lmsOIDAlgo < LMS_SHA256_M32_H5,   ippStsBadArgErr);
+
+
+    /* Set LMOTS and LMS parameters */
+    cpLMOTSParams lmotsParams;
+    ippcpSts = setLMOTSParams(lmsType.lmotsOIDAlgo, &lmotsParams);
+    IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts)
+    cpLMSParams lmsParams;
+    ippcpSts = setLMSParams(lmsType.lmsOIDAlgo, &lmsParams);
+    IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts)
+
+    /* Check message length */
+    IPP_BADARG_RET(maxMessageLength < 1, ippStsLengthErr);
+    // this restriction is needed to avoid overflow of Ipp32s
+    // maxMessageLength must be less than    IPP_MAX_32S       - (CP_PK_I_BYTESIZE + q + D_MESG +      C       )                        
+    IPP_BADARG_RET(maxMessageLength  >  (Ipp32s)((IPP_MAX_32S) - (CP_PK_I_BYTESIZE + 4 +   2    + lmotsParams.n)),
+                   ippStsLengthErr);
+
+    /* Calculate the maximum Set LMOTS and LMS parameters */
+                      //    pubKey->I   ||  q  ||  D_MESG  ||          C        ||            pMsg
+    Ipp32u lenBufQ    = CP_PK_I_BYTESIZE +  4   +     2     +    lmotsParams.n   + (Ipp32u)maxMessageLength;
+                      //    pubKey->I   ||  q  ||  i  || j ||     Y[i]
+    Ipp32u lenBufTmp  = CP_PK_I_BYTESIZE +  4  +   2  +  1  + lmotsParams.n;
+                      //    pubKey->I   || node_num || D_LEAF ||      Kc
+    Ipp32u lenBufTc   = CP_PK_I_BYTESIZE +     4     +    2    + lmotsParams.n;
+                      //    pubKey->I   || node_num/2 || D_INTR ||    path[i]   ||     tmp
+    Ipp32u lenBufIntr = CP_PK_I_BYTESIZE +      4      +    2    + lmotsParams.n + lmotsParams.n;
+
+    *pSize = (Ipp32s)IPP_MAX(IPP_MAX(IPP_MAX(lenBufQ, lenBufTmp), lenBufTc), lenBufIntr);
+
+    return ippcpSts;
+}
+
+/*F*
+//    Name: ippsLMSSignatureStateGetSize
+//
+// Purpose: Get the LMS signature state size (bytes).
+//
+// Returns:                Reason:
+//    ippStsNullPtrErr        pSize == NULL
+//    ippStsBadArgErr         lmsType.lmotsOIDAlgo > LMOTS_SHA256_N24_W8
+//                            lmsType.lmotsOIDAlgo < LMOTS_SHA256_N32_W1
+//                            lmsType.lmsOIDAlgo   > LMS_SHA256_M24_H25
+//                            lmsType.lmsOIDAlgo   < LMS_SHA256_M32_H5
+//    ippStsNoErr             no errors
+//
+// Parameters:
+//    pSize         pointer to the size
+//    lmsType       structure with LMS parameters lmotsOIDAlgo and lmsOIDAlgo
+//
+*F*/
+
+IPPFUN(IppStatus, ippsLMSSignatureStateGetSize, (Ipp32s* pSize, const IppsLMSAlgoType lmsType))
+{
+    IppStatus ippcpSts = ippStsNoErr;
+
+    IPP_BAD_PTR1_RET(pSize);
+    IPP_BADARG_RET(lmsType.lmotsOIDAlgo > LMOTS_SHA256_N24_W8, ippStsBadArgErr);
+    IPP_BADARG_RET(lmsType.lmotsOIDAlgo < LMOTS_SHA256_N32_W1, ippStsBadArgErr);
+    IPP_BADARG_RET(lmsType.lmsOIDAlgo > LMS_SHA256_M24_H25,  ippStsBadArgErr);
+    IPP_BADARG_RET(lmsType.lmsOIDAlgo < LMS_SHA256_M32_H5,   ippStsBadArgErr);
+
+    /* Set LMOTS and LMS parameters */
+    cpLMOTSParams lmotsParams;
+    ippcpSts = setLMOTSParams(lmsType.lmotsOIDAlgo, &lmotsParams);
+    IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts)
+    cpLMSParams lmsParams;
+    ippcpSts = setLMSParams(lmsType.lmsOIDAlgo, &lmsParams);
+    IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts)
+
+    *pSize = (Ipp32s)sizeof(IppsLMSSignatureState)  +
+             (Ipp32s)(lmotsParams.n * lmsParams.h)  + /*_pAuthPath*/
+             (Ipp32s)lmotsParams.n                  + /* C */
+             (Ipp32s)(lmotsParams.n * lmotsParams.p); /* Y */
+
+    return ippcpSts;
+}
+
+/*F*
+//    Name: ippsLMSPublicKeyStateGetSize
+//
+// Purpose: Provides the LMS public key state size (bytes).
+//
+// Returns:                Reason:
+//    ippStsNullPtrErr        pSize == NULL
+//    ippStsBadArgErr         lmsType.lmotsOIDAlgo > LMOTS_SHA256_N24_W8
+//                            lmsType.lmotsOIDAlgo < LMOTS_SHA256_N32_W1
+//                            lmsType.lmsOIDAlgo   > LMS_SHA256_M24_H25
+//                            lmsType.lmsOIDAlgo   < LMS_SHA256_M32_H5
+//    ippStsNoErr             no errors
+//
+// Parameters:
+//    pSize             pointer to the size
+//    lmsType           structure with LMS parameters lmotsOIDAlgo and lmsOIDAlgo
+//
+*F*/
+IPPFUN(IppStatus, ippsLMSPublicKeyStateGetSize, (Ipp32s* pSize, const IppsLMSAlgoType lmsType))
+{
+    IppStatus ippcpSts = ippStsNoErr;
+
+    IPP_BAD_PTR1_RET(pSize);
+    IPP_BADARG_RET(lmsType.lmotsOIDAlgo > LMOTS_SHA256_N24_W8, ippStsBadArgErr);
+    IPP_BADARG_RET(lmsType.lmotsOIDAlgo < LMOTS_SHA256_N32_W1, ippStsBadArgErr);
+    IPP_BADARG_RET(lmsType.lmsOIDAlgo > LMS_SHA256_M24_H25,  ippStsBadArgErr);
+    IPP_BADARG_RET(lmsType.lmsOIDAlgo < LMS_SHA256_M32_H5,   ippStsBadArgErr);
+
+    /* Set LMS parameters */
+    cpLMSParams lmsParams;
+    ippcpSts = setLMSParams(lmsType.lmsOIDAlgo, &lmsParams);
+    IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts)
+
+    *pSize = (Ipp32s)sizeof(IppsLMSPublicKeyState) +
+             (Ipp32s)lmsParams.m; /* T1 */
+
+    return ippcpSts;
+}
+
+/*F*
+//    Name: ippsLMSSetPublicKeyState
+//
+// Purpose: Set LMS public key.
+//
+// Returns:                Reason:
+//    ippStsNullPtrErr        pI == NULL
+//                            pK == NULL
+//                            pState == NULL
+//    ippStsBadArgErr         lmsType.lmotsOIDAlgo > LMOTS_SHA256_N24_W8
+//                            lmsType.lmotsOIDAlgo < LMOTS_SHA256_N32_W1
+//                            lmsType.lmsOIDAlgo   > LMS_SHA256_M24_H25
+//                            lmsType.lmsOIDAlgo   < LMS_SHA256_M32_H5
+//    ippStsNoErr             no errors
+//
+// Parameters:
+//    lmsType         structure with LMS parameters lmotsOIDAlgo and lmsOIDAlgo
+//    pI              pointer to the LMS private key identifier
+//    pK              pointer to the LMS public key
+//    pState          pointer to the LMS public key state
+//
+*F*/
+IPPFUN(IppStatus, ippsLMSSetPublicKeyState, (const IppsLMSAlgoType lmsType,
+                                             const Ipp8u* pI, const Ipp8u* pK,
+                                             IppsLMSPublicKeyState* pState))
+{
+    IppStatus ippcpSts = ippStsNoErr;
+
+    IPP_BAD_PTR3_RET(pI, pK, pState);
+    IPP_BADARG_RET(lmsType.lmotsOIDAlgo > LMOTS_SHA256_N24_W8, ippStsBadArgErr);
+    IPP_BADARG_RET(lmsType.lmotsOIDAlgo < LMOTS_SHA256_N32_W1, ippStsBadArgErr);
+    IPP_BADARG_RET(lmsType.lmsOIDAlgo > LMS_SHA256_M24_H25,  ippStsBadArgErr);
+    IPP_BADARG_RET(lmsType.lmsOIDAlgo < LMS_SHA256_M32_H5,   ippStsBadArgErr);
+
+    /* Set context id to prevent its copying */
+    CP_LMS_SET_CTX_ID(pState);
+
+    /* Set LMS parameters */
+    cpLMSParams lmsParams;
+    ippcpSts = setLMSParams(lmsType.lmsOIDAlgo, &lmsParams);
+    IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts)
+
+    /* Fill in the structure */
+    pState->lmsOIDAlgo = lmsType.lmsOIDAlgo;
+    pState->lmotsOIDAlgo = lmsType.lmotsOIDAlgo;
+    CopyBlock(pI, pState->I, CP_PK_I_BYTESIZE);
+    // Set pointer to T1 right to the end of the context
+    pState->T1 = (Ipp8u*)pState+sizeof(IppsLMSPublicKeyState);
+    CopyBlock(pK, pState->T1, (cpSize)lmsParams.m);
+
+    return ippcpSts;
+}
+
+/*F*
+//    Name: ippsLMSSetSignatureState
+//
+// Purpose: Set LMS signature.
+//
+// Returns:                Reason:
+//    ippStsNullPtrErr        pC == NULL
+//                            pY == NULL
+//                            pAuthPath == NULL
+//                            pState == NULL
+//    ippStsBadArgErr         lmsType.lmotsOIDAlgo > LMOTS_SHA256_N24_W8
+//                            lmsType.lmotsOIDAlgo < LMOTS_SHA256_N32_W1
+//                            lmsType.lmsOIDAlgo   > LMS_SHA256_M24_H25
+//                            lmsType.lmsOIDAlgo   < LMS_SHA256_M32_H5
+//                            q is incorrect
+//    ippStsNoErr             no errors
+//
+// Parameters:
+//    lmsType        structure with LMS parameters lmotsOIDAlgo and lmsOIDAlgo
+//    q              index of LMS leaf
+//    pC             pointer to the C LM-OTS value
+//    pY             pointer to the y LM-OTS value
+//    pAuthPath      pointer to the LMS authorization path
+//    pState         pointer to the LMS signature state
+//
+*F*/
+
+IPPFUN(IppStatus, ippsLMSSetSignatureState, (const IppsLMSAlgoType lmsType,
+                                             Ipp32u q,
+                                             const Ipp8u* pC,
+                                             const Ipp8u* pY,
+                                             const Ipp8u* pAuthPath,
+                                             IppsLMSSignatureState* pState))
+{
+    IPP_BAD_PTR4_RET(pC, pY, pAuthPath, pState);
+    IPP_BADARG_RET(lmsType.lmotsOIDAlgo > LMOTS_SHA256_N24_W8, ippStsBadArgErr);
+    IPP_BADARG_RET(lmsType.lmotsOIDAlgo < LMOTS_SHA256_N32_W1, ippStsBadArgErr);
+    IPP_BADARG_RET(lmsType.lmsOIDAlgo > LMS_SHA256_M24_H25,  ippStsBadArgErr);
+    IPP_BADARG_RET(lmsType.lmsOIDAlgo < LMS_SHA256_M32_H5,   ippStsBadArgErr);
+
+    IppStatus ippcpSts = ippStsNoErr;
+
+    /* Set LMOTS and LMS parameters */
+    cpLMOTSParams lmotsParams;
+    ippcpSts = setLMOTSParams(lmsType.lmotsOIDAlgo, &lmotsParams);
+    IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts)
+    cpLMSParams lmsParams;
+    ippcpSts = setLMSParams(lmsType.lmsOIDAlgo, &lmsParams);
+    IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts)
+
+    /* Set context id to prevent its copying */
+    CP_LMS_SET_CTX_ID(pState);
+
+    /* Check q value before set */
+    Ipp32u qLimit = 1 << lmsParams.h;
+    IPP_BADARG_RET(q >= qLimit,   ippStsBadArgErr);
+
+    pState->_q = q;
+    pState->_lmsOIDAlgo = lmsType.lmsOIDAlgo;
+
+    _cpLMOTSSignatureState* locLMOTSSig = &(pState->_lmotsSig);
+    locLMOTSSig->_lmotsOIDAlgo = lmsType.lmotsOIDAlgo;
+
+    // Copy auth path data
+    Ipp32s authPathSize = (Ipp32s)(lmsParams.h * lmotsParams.n);
+    pState->_pAuthPath = (Ipp8u*)pState+sizeof(IppsLMSSignatureState);
+    CopyBlock(pAuthPath, pState->_pAuthPath, authPathSize);
+
+    // Copy C data
+    Ipp32s cSize = (Ipp32s)lmotsParams.n;
+    locLMOTSSig->pC = (Ipp8u*)pState->_pAuthPath+authPathSize;
+    CopyBlock(pC, locLMOTSSig->pC, cSize);
+
+    // Copy Y data
+    Ipp32s ySize = (Ipp32s)(lmotsParams.n * lmotsParams.p);
+    locLMOTSSig->pY = (Ipp8u*)pState->_pAuthPath+authPathSize+cSize;
+    CopyBlock(pY, locLMOTSSig->pY, ySize);
+
+    return ippcpSts;
+}
diff --git a/sources/ippcp/lms/lms_verify.c b/sources/ippcp/lms/lms_verify.c
new file mode 100644
index 00000000..4c78a354
--- /dev/null
+++ b/sources/ippcp/lms/lms_verify.c
@@ -0,0 +1,223 @@
+/*************************************************************************
+* Copyright (C) 2024 Intel Corporation
+*
+* Licensed under the Apache License,  Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* 	http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law  or agreed  to  in  writing,  software
+* distributed under  the License  is  distributed  on  an  "AS IS"  BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the  specific  language  governing  permissions  and
+* limitations under the License.
+*************************************************************************/
+#include "lms_internal/lms.h"
+
+/*F*
+//    Name: ippsLMSVerify
+//
+// Purpose: LMS signature verification.
+//
+// Returns:                Reason:
+//    ippStsNullPtrErr        pMsg == NULL
+//                            pSign == NULL
+//                            pIsSignValid == NULL
+//                            pKey == NULL
+//                            pBuffer == NULL
+//    ippStsBadArgErr         wrong LMS or LMOTS parameters
+//                            inside pSign and pKey
+//                            OR q is incorrect
+//    ippStsContextMatchErr   pSign or pKey contexts are invalid
+//    ippStsLengthErr         msgLen < 1
+//    ippStsNoErr             no errors
+//
+// Parameters:
+//    pMsg           pointer to the message data buffer
+//    msgLen         message buffer length, bytes
+//    pSign          pointer to the LMS signature state
+//    pIsSignValid   1 if signature is valid, 0 - vice versa
+//    pKey           pointer to the LMS public key state
+//    pBuffer        pointer to the temporary memory
+//
+*F*/
+
+IPPFUN(IppStatus, ippsLMSVerify, (const Ipp8u* pMsg, const Ipp32s msgLen,
+                                  const IppsLMSSignatureState* pSign,
+                                  int*  pIsSignValid,
+                                  const IppsLMSPublicKeyState* pKey,
+                                  Ipp8u* pBuffer))
+{
+    IppStatus ippcpSts = ippStsNoErr;
+
+    /* Check if any of input pointers are NULL */
+    IPP_BAD_PTR4_RET(pMsg, pSign, pIsSignValid, pKey)
+    /* Check if temporary buffer is NULL */
+    IPP_BAD_PTR1_RET(pBuffer)
+    /* Check msg length */
+    IPP_BADARG_RET(msgLen < 1, ippStsLengthErr)
+    IPP_BADARG_RET( !CP_LMS_VALID_CTX_ID(pSign), ippStsContextMatchErr );
+    IPP_BADARG_RET( !CP_LMS_VALID_CTX_ID(pKey), ippStsContextMatchErr );
+    *pIsSignValid = 0;
+
+    /*              Parse public key(Pk)             */
+    /* --------------------------------------------- */
+    IppsLMSAlgo lmsTypePk = pKey->lmsOIDAlgo;
+    IppsLMOTSAlgo lmotsTypePk = pKey->lmotsOIDAlgo;
+
+    // Set LMOTS and LMS parameters
+    cpLMOTSParams lmotsParams;
+    cpLMSParams lmsParams;
+    ippcpSts = setLMOTSParams(lmotsTypePk, &lmotsParams);
+    IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts)
+    ippcpSts = setLMSParams(lmsTypePk, &lmsParams);
+    IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts)
+    Ipp32u nParam = lmotsParams.n;
+    Ipp32u wParam = lmotsParams.w;
+    Ipp32u pParam = lmotsParams.p;
+    Ipp32u hParam = lmsParams.h;
+    Ipp32u mParam = lmsParams.m;
+
+    /*                    Parse signature                   */
+    /* ---------------------------------------------------- */
+    Ipp32u q = pSign->_q;
+    _cpLMOTSSignatureState lmotsSig = pSign->_lmotsSig;
+    IppsLMOTSAlgo lmotsTypeSig = lmotsSig._lmotsOIDAlgo;
+    IppsLMSAlgo lmsTypeSig = pSign->_lmsOIDAlgo;
+    Ipp8u* pAuthPath = pSign->_pAuthPath;
+
+    // Check the validity of the parsed signature parameters
+    Ipp32u qLimit = 1 << hParam;
+    if((lmsTypePk != lmsTypeSig) || (lmotsTypePk != lmotsTypeSig) || (q >= qLimit))
+    {
+        return ippStsBadArgErr;
+    }
+
+    /* Compute LMS pub key candidate (Algorithms 6a and 4b) */
+    /* ---------------------------------------------------- */
+    Ipp8u* tmpQBuf = pBuffer;
+    Ipp32u total_size = 0;
+    // Buffer's invariant for alg correctness - first 16 bytes is always pubKey->I
+    CopyBlock(pKey->I, tmpQBuf, CP_PK_I_BYTESIZE); total_size+=CP_PK_I_BYTESIZE;
+    toByte(tmpQBuf+total_size, /*q byteLen*/ 4, q); total_size += /*q byteLen*/ 4;
+    toByte(tmpQBuf+total_size, /*D_MESG byteLen*/ 2, D_MESG); total_size += /*D_MESG byteLen*/ 2;
+    CopyBlock(lmotsSig.pC, tmpQBuf+total_size, (cpSize)nParam); total_size += nParam;
+    CopyBlock(pMsg, tmpQBuf+total_size, msgLen); total_size += (Ipp32u)msgLen;
+
+    // Q = H(I || u32str(q) || u16str(D_MESG) || C || message)
+    Ipp8u Q_CksmQ[CP_LMS_MAX_HASH_BYTESIZE+CP_CKSM_BYTESIZE];
+    ippcpSts = ippsHashMessage_rmf(tmpQBuf, (int)total_size, Q_CksmQ, lmsParams.hash_method);
+    IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts)
+
+    /* Calculate checksum Cksm(Q) and append it to Q */
+    Ipp32u cksmQ = cpCksm(Q_CksmQ, lmotsParams);
+    toByte(Q_CksmQ+nParam, /*cksmQ byteLen*/2, cksmQ);
+
+    Ipp8u z[CP_SIG_MAX_Y_WORDSIZE+1][CP_LMS_MAX_HASH_BYTESIZE];
+    Ipp8u* pZ = z[0];
+
+    for(Ipp32u i = 0; i < pParam; i++) {
+        // a = coef(Q || Cksm(Q), i, w)
+        Ipp32u a = cpCoef(Q_CksmQ, i, wParam);
+        //tmp = y[i]
+        Ipp8u tmp[CP_LMS_MAX_HASH_BYTESIZE];
+        CopyBlock(lmotsSig.pY + i*nParam, tmp, (cpSize)nParam);
+
+        // I || u32str(q)
+        Ipp8u* tmpBuff = pBuffer;
+        // I || u32str(q) || u16str(i)
+        toByte(tmpBuff+CP_PK_I_BYTESIZE+/*q byteLen*/4,/*i byteLen*/2,i);
+        for(Ipp32u j = a; j < (Ipp32u)((1 << wParam) - 1); j++) {
+            // I || u32str(q) || u16str(i) || u8str(j)
+            toByte(tmpBuff+CP_PK_I_BYTESIZE+/*q byteLen*/4+/*i byteLen*/2,/*j byteLen*/1,j);
+            // I || u32str(q) || u16str(i) || u8str(j) || tmp
+            CopyBlock(tmp, tmpBuff+CP_PK_I_BYTESIZE+/*q byteLen*/4+/*i byteLen*/2+/*j byteLen*/1, (cpSize)nParam);
+            // tmp = H(I || u32str(q) || u16str(i) || u8str(j) || tmp)
+            ippcpSts = ippsHashMessage_rmf(tmpBuff,
+                                           (int)(CP_PK_I_BYTESIZE+/*q byteLen*/4+/*i byteLen*/2+/*j byteLen*/1+nParam),
+                                           tmp,
+                                           lmsParams.hash_method);
+            IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts)
+        }
+        CopyBlock(tmp, pZ+(i+1)*nParam, (cpSize)nParam);
+    }
+    //                                              I           u32str(q)   u16str(D_PBLC)
+    Ipp32s zStartOffset = (Ipp32s)(nParam - (CP_PK_I_BYTESIZE +     4     +      2        ));
+    //                                            I          u16str(D_PBLC)
+    CopyBlock(tmpQBuf, pZ + zStartOffset, CP_PK_I_BYTESIZE +       4       );
+    // Conduct operation u16str(D_PBLC)
+    toByte(pZ + nParam - /*D_PBLC byteLen*/2, /*D_PBLC byteLen*/2, D_PBLC);
+    // tmp = Kc = H(I || u32str(q) || u16str(D_PBLC) || z[0] || z[1] || ... || z[p-1])
+    Ipp8u Kc[CP_LMS_MAX_HASH_BYTESIZE];
+    ippcpSts = ippsHashMessage_rmf(pZ+zStartOffset,
+                                   (int)(pParam*nParam+CP_PK_I_BYTESIZE+/*q byteLen*/4+/*D_PBLC byteLen*/2),
+                                   Kc,
+                                   lmsParams.hash_method);
+    IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts)
+
+    /*    Compute the candidate LMS root value Tc    */
+    /* --------------------------------------------- */
+    Ipp32u node_num = (1 << hParam) + q;
+    Ipp8u* tmpBuffKc = pBuffer;
+    // I || u32str(node_num)
+    toByte(tmpBuffKc+CP_PK_I_BYTESIZE, /*node_num byteLen*/4, node_num);
+    // I || u32str(node_num) || u16str(D_LEAF)
+    toByte(tmpBuffKc+CP_PK_I_BYTESIZE+/*node_num byteLen*/4, /*D_LEAF byteLen*/2, D_LEAF);
+    // I || u32str(node_num) || u16str(D_LEAF) || Kc
+    CopyBlock(Kc, tmpBuffKc+CP_PK_I_BYTESIZE+/*node_num byteLen*/4+/*D_LEAF byteLen*/2, (cpSize)mParam);
+    Ipp8u tmp[CP_LMS_MAX_HASH_BYTESIZE];
+    ippcpSts = ippsHashMessage_rmf(tmpBuffKc,
+                                   (int)(CP_PK_I_BYTESIZE+/*node_num byteLen*/4+/*D_LEAF byteLen*/2+mParam),
+                                   tmp,
+                                   lmsParams.hash_method);
+    IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts)
+
+    Ipp32u i = 0;
+    Ipp8u* locTmp = pBuffer;
+    // I || u32str(node_num/2) || u16str(D_INTR)
+    toByte(locTmp+CP_PK_I_BYTESIZE+/*node_num byteLen*/4, /*D_INTR byteLen*/2, D_INTR);
+    while (node_num > 1) {
+        // I || u32str(node_num/2)
+        toByte(locTmp+CP_PK_I_BYTESIZE, /*node_num byteLen*/4, node_num/2);
+
+        if((node_num & 1) == 1) {
+            // I || u32str(node_num/2) || u16str(D_INTR) || path[i]
+            CopyBlock(pAuthPath+i*mParam,
+                      locTmp+CP_PK_I_BYTESIZE+/*node_num byteLen*/4+/*D_INTR byteLen*/2,
+                      (cpSize)mParam);
+            // I || u32str(node_num/2) || u16str(D_INTR) || path[i] || tmp
+            CopyBlock(tmp,
+                      locTmp+CP_PK_I_BYTESIZE+/*node_num byteLen*/4+/*D_INTR byteLen*/2+mParam,
+                      (cpSize)mParam);
+        }
+        else {
+            // I || u32str(node_num/2) || u16str(D_INTR) || tmp
+            CopyBlock(tmp,
+                      locTmp+CP_PK_I_BYTESIZE+/*node_num byteLen*/4+/*D_INTR byteLen*/2,
+                      (cpSize)mParam);
+            // I || u32str(node_num/2) || u16str(D_INTR) || tmp || path[i]
+            CopyBlock(pAuthPath+i*mParam,
+                      locTmp+CP_PK_I_BYTESIZE+/*node_num byteLen*/4+/*D_INTR byteLen*/2+mParam,
+                      (cpSize)mParam);
+        }
+
+        ippcpSts = ippsHashMessage_rmf(locTmp,
+                                       (int)(CP_PK_I_BYTESIZE+/*node_num byteLen*/4+/*D_INTR byteLen*/2+2*mParam),
+                                       tmp,
+                                       lmotsParams.hash_method);
+        IPP_BADARG_RET((ippStsNoErr != ippcpSts), ippcpSts)
+
+        node_num = node_num >> 1;
+        i++;
+    }
+
+    /*          Verify with given public key         */
+    /* --------------------------------------------- */
+    BNU_CHUNK_T is_equal = cpIsEquBlock_ct(pKey->T1, tmp, (int)mParam);
+    if(is_equal) {
+        *pIsSignValid = 1;
+    }
+
+    return ippcpSts;
+}
diff --git a/sources/ippcp/owncp.h b/sources/ippcp/owncp.h
index 962af354..ca5fb8fb 100644
--- a/sources/ippcp/owncp.h
+++ b/sources/ippcp/owncp.h
@@ -111,7 +111,7 @@ typedef int cpSize;
 #define LSR32(x,nBits)  ((x)>>(nBits))
 #define LSL32(x,nBits)  ((x)<<(nBits))
 
-/* Rorate (right and left) of WORD */
+/* Rotate (right and left) of WORD */
 #if defined(_MSC_VER) && !defined( __ICL )
 #  include <stdlib.h>
 #  define ROR32(x, nBits)  _lrotr((x),(nBits))
@@ -125,7 +125,7 @@ typedef int cpSize;
 #define LSR64(x,nBits)  ((x)>>(nBits))
 #define LSL64(x,nBits)  ((x)<<(nBits))
 
-/* Rorate (right and left) of DWORD */
+/* Rotate (right and left) of DWORD */
 #define ROR64(x, nBits) (LSR64((x),(nBits)) | LSL64((x),64-(nBits)))
 #define ROL64(x, nBits) ROR64((x),(64-(nBits)))
 
@@ -167,7 +167,7 @@ typedef int cpSize;
 /* test if library's feature is ON */
 int cpGetFeature( Ipp64u Feature );
 /* test CPU crypto features */
-__INLINE Ipp32u IsFeatureEnabled(Ipp64u niMmask)
+__IPPCP_INLINE Ipp32u IsFeatureEnabled(Ipp64u niMmask)
 {
    return (Ipp32u)cpGetFeature(niMmask);
 }
@@ -194,7 +194,7 @@ _mm_cvtsi64_si128(__int64 a)
 }
 #endif
 
-#if !defined( __x86_64__ ) && defined(__GNUC__)
+#if !defined( __x86_64__ ) && defined(__GNUC__) && (!defined(__clang__) || (__clang_major__ < 16))
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtsi64_si128 (long long __A)
 {
diff --git a/sources/ippcp/pcpaes_avx2_vaes.h b/sources/ippcp/pcpaes_avx2_vaes.h
index 15f1866a..42701c6c 100644
--- a/sources/ippcp/pcpaes_avx2_vaes.h
+++ b/sources/ippcp/pcpaes_avx2_vaes.h
@@ -14,13 +14,13 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     AES GCM AVX2
 //     Internal Functions Implementations
-// 
+//
 */
 
 #ifndef __AES_GCM_AVX2_H_
@@ -33,11 +33,49 @@
 
 #if (_IPP==_IPP_H9) || (_IPP32E==_IPP32E_L9)
 
+#ifdef __GNUC__
+#define ASM(a) __asm__(a);
+#else
+#define ASM(a)
+#endif
+
+/*
+// Zeroes the memory by 32 bit parts,
+// because "epi32" is the minimal available granularity for avx2 store instructions.
+// input:
+//   Ipp32u* out - pointer to the memory that needs to be zeroize
+//   int len - length of the "out" array, in 32-bit chunks
+*/
+static __NOINLINE
+void zeroize_256(Ipp32u* out, int len)
+{
+#if defined(__GNUC__)
+   // Avoid dead code elimination for GNU compilers
+   ASM("");
+#endif
+   __m256i T = _mm256_setzero_si256();
+   int i;
+   int tmp[8];
+   int rest = len % 8;
+   if (rest == 0)
+      for(i=0; i<8; i++)
+         tmp[i] = (int)0xFFFFFFFF;
+   else {
+      for(i=0; i<rest; i++)
+         tmp[i] = (int)0xFFFFFFFF;
+      for(i=rest; i<8; i++)
+         tmp[i] = 0;
+   }
+   __m256i mask = _mm256_set_epi32(tmp[7], tmp[6], tmp[5], tmp[4], tmp[3], tmp[2], tmp[1], tmp[0]);
+   for(i=0; i<len-7; i+=8)
+      _mm256_storeu_si256((void*)(out+i), T);
+   if (i < len)
+      _mm256_maskstore_epi32((void*)(out+i), mask, T);
+}
+
 #define MAX_NK 15 //the largest possible number of keys
 
 #define SHUFD_MASK 78 // 01001110b
-#define STEP_SIZE 64 // 4*BLOCK_SIZE, due to block size is always 16 for AES
-#define HALF_STEP_SIZE 32 // 2*BLOCK_SIZE, due to block size is always 16 for AES
 
 //is used to increment two 128-bit words in a 256-bit register
 #define IncrementRegister256(t_block, t_incr, t_shuffle_mask) \
@@ -46,8 +84,11 @@
    t_block = _mm256_shuffle_epi8(t_block, t_shuffle_mask)
 
 // these constants are used to increment two 128-bit words in a 256-bit register
+__ALIGN32 static const Ipp32u _increment1[] = {0, 0, 0, 1, 0, 0, 0, 1};
 __ALIGN32 static const Ipp32u _increment2[] = {0, 0, 0, 2, 0, 0, 0, 2};
 __ALIGN32 static const Ipp32u _increment4[] = {0, 0, 0, 4, 0, 0, 0, 4};
+__ALIGN32 static const Ipp32u _increment8[] = {0, 0, 0, 8, 0, 0, 0, 8};
+__ALIGN32 static const Ipp32u _increment16[] = {0, 0, 0, 16, 0, 0, 0, 16};
 __ALIGN32 static const Ipp8u swapBytes256[] = {
    16, 17, 18, 19, 20, 21, 22, 23,
    24, 25, 26, 27, 31, 30, 29, 28,
@@ -59,138 +100,271 @@ __ALIGN32 static const Ipp8u swapBytes256[] = {
 __ALIGN32 static const Ipp8u _shuff_mask_128[] = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
 __ALIGN32 static const Ipp8u _shuff_mask_256[] = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
 
-// masks for operations with intrinsics 
-__ALIGN32 static const Ipp8u _mask_lo_256[] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0};
-__ALIGN32 static const Ipp8u _mask_hi_256[] = {0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF};
-
 /*
-// sse_clmul_gcm performs clmul with 128-bit registers; is used in the combine hash step
+// performs Karatsuba carry-less multiplication
 // input:
-//    const __m128i *HK - contains hashed keys
+//   __m256i GH - contains current GHASH
+//   const __m256i HK - contains hashed keys
+// input/output:
+//   __m256i *tmpX0, __m128i *tmpX5 - contains temporary data for multiplication
+// output:
+//   __m256i part of the multiplication result
+*/
+__IPPCP_INLINE __m256i avx2_internal_mul(__m256i GH, const __m256i HK, __m256i *tmpX0, __m256i *tmpX5) {
+   __m256i tmpX2, tmpX3, tmpX4;
+
+   tmpX2 = _mm256_shuffle_epi32 (GH, SHUFD_MASK);
+   tmpX3 = _mm256_shuffle_epi32 (HK, SHUFD_MASK);
+   tmpX2 = _mm256_xor_si256(tmpX2, GH);
+   tmpX3 = _mm256_xor_si256(tmpX3, HK);
+   tmpX4 = _mm256_clmulepi64_epi128(GH, HK, 0x11);
+   *tmpX0 = _mm256_xor_si256(*tmpX0, tmpX4);
+   tmpX4 = _mm256_clmulepi64_epi128(GH, HK, 0x00);
+   *tmpX5 = _mm256_xor_si256(*tmpX5, tmpX4);
+   return _mm256_clmulepi64_epi128(tmpX2, tmpX3, 0x00);
+}
+
+/*
+// performs the reduction phase after carry-less multiplication
 // input/output:
-//    __m128i *GH - contains GHASH. Will be overwritten in this function
+//   __m128i *hash0, __m128i *hash1 - contains the two parts of the GHASH
 */
-__INLINE void sse_clmul_gcm(__m128i *GH, const __m128i *HK) {
-   __m128i tmpX0, tmpX1, tmpX2, tmpX3;
-   tmpX2 = _mm_shuffle_epi32 (*GH, SHUFD_MASK); //tmpX2 = {GH0:GH1}
-   tmpX0 = _mm_shuffle_epi32 (*HK, SHUFD_MASK); //tmpX0 = {HK0:HK1}
-   tmpX2 = _mm_xor_si128(tmpX2, *GH); //tmpX2 = {GH0+GH1:GH1+GH0}
-   tmpX0 = _mm_xor_si128(tmpX0, *HK); //tmpX0 = {HK0+HK1:HK1+HK0}
-   tmpX2 = _mm_clmulepi64_si128 (tmpX2, tmpX0, 0x00); //tmpX2 = (a1+a0)*(b1+b0);  tmpX2 = (GH1+GH0)*(HK1+HK0)
-   tmpX1 = *GH;
-   *GH = _mm_clmulepi64_si128 (*GH, *HK, 0x00); //GH = a0*b0;  GH = GH0*HK0
-   tmpX0 = _mm_xor_si128(tmpX0, tmpX0);
-   tmpX1 = _mm_clmulepi64_si128 (tmpX1, *HK, 0x11); //tmpX1 = a1*b1;   tmpX1 = GH1*HK1
-   tmpX2 = _mm_xor_si128(tmpX2, *GH); //tmpX2 = (GH1+GH0)*(HK1+HK0) + GH0*HK0
-   tmpX2 = _mm_xor_si128(tmpX2, tmpX1); //tmpX2 = a0*b1+a1*b0;    tmpX2 = (GH1+GH0)*(HK1+HK0) + GH0*HK0 + GH1*HK1 = GH0*HK1+GH1*HK0
-   tmpX0 = _mm_alignr_epi8 (tmpX0, tmpX2, 8); //tmpX0 = {Zeros : HI(a0*b1+a1*b0)}
-   tmpX2 = _mm_slli_si128 (tmpX2, 8); //tmpX2 = {LO(HI(a0*b1+a1*b0)) : Zeros}
-   tmpX1 = _mm_xor_si128(tmpX1, tmpX0); //<tmpX1:GH> holds the result of the carry-less multiplication of GH by HK
-   *GH = _mm_xor_si128(*GH, tmpX2);
+__IPPCP_INLINE void reduction(__m128i *hash0, __m128i *hash1) {
+   __m128i T1, T2, T3;
 
    //first phase of the reduction
-   tmpX0 = *GH; //copy GH into tmpX0, tmpX2, tmpX3
-   tmpX2 = *GH;
-   tmpX3 = *GH;
-   tmpX0 = _mm_slli_epi64 (tmpX0, 63); //packed left shifting << 63
-   tmpX2 = _mm_slli_epi64 (tmpX2, 62); //packed left shifting shift << 62
-   tmpX3 = _mm_slli_epi64 (tmpX3, 57); //packed left shifting shift << 57
-   tmpX0 = _mm_xor_si128(tmpX0, tmpX2); //xor the shifted versions
-   tmpX0 = _mm_xor_si128(tmpX0, tmpX3);
-   tmpX2 = tmpX0;
-   tmpX2 = _mm_slli_si128 (tmpX2, 8); //shift-L tmpX2 2 DWs
-   tmpX0 = _mm_srli_si128 (tmpX0, 8); //shift-R xmm2 2 DWs
-   *GH = _mm_xor_si128(*GH, tmpX2); //first phase of the reduction complete
-   tmpX1 = _mm_xor_si128(tmpX1, tmpX0); //save the lost MS 1-2-7 bits from first phase
+   T1 = *hash1; //copy GH into T1, T2, T3
+   T2 = *hash1;
+   T3 = *hash1;
+   T1 = _mm_slli_epi64 (T1, 63); //packed left shifting << 63
+   T2 = _mm_slli_epi64 (T2, 62); //packed left shifting << 62
+   T3 = _mm_slli_epi64 (T3, 57); //packed left shifting << 57
+   T1 = _mm_xor_si128(T1, T2); //xor the shifted versions
+   T1 = _mm_xor_si128(T1, T3);
+   T2 = T1;
+   T2 = _mm_slli_si128 (T2, 8); //shift-L T2 2 DWs
+   T1 = _mm_srli_si128 (T1, 8); //shift-R T1 2 DWs
+   *hash1 = _mm_xor_si128(*hash1, T2); //first phase of the reduction complete
+   *hash0 = _mm_xor_si128(*hash0, T1); //save the lost MS 1-2-7 bits from first phase
 
    //second phase of the reduction
-   tmpX2 = *GH;
-   tmpX2 = _mm_srli_epi64(tmpX2, 5); //packed right shifting >> 5
-   tmpX2 = _mm_xor_si128(tmpX2, *GH); //xor shifted versions
-   tmpX2 = _mm_srli_epi64(tmpX2, 1); //packed right shifting >> 1
-   tmpX2 = _mm_xor_si128(tmpX2, *GH); //xor shifted versions
-   tmpX2 = _mm_srli_epi64(tmpX2, 1); //packed right shifting >> 1
-   *GH = _mm_xor_si128(*GH, tmpX2); //second phase of the reduction complete
-   *GH = _mm_xor_si128(*GH, tmpX1); //the result is in GH
+   T2 = *hash1;
+   T2 = _mm_srli_epi64(T2, 5); //packed right shifting >> 5
+   T2 = _mm_xor_si128(T2, *hash1); //xor shifted versions
+   T2 = _mm_srli_epi64(T2, 1); //packed right shifting >> 1
+   T2 = _mm_xor_si128(T2, *hash1); //xor shifted versions
+   T2 = _mm_srli_epi64(T2, 1); //packed right shifting >> 1
+   *hash1 = _mm_xor_si128(*hash1, T2); //second phase of the reduction complete
 }
 
 /*
-// avx2_clmul_gcm performs clmul with 256-bit registers; is used in the hash calculation step
+// avx2_clmul_gcm16 performs the hash calculation with 256-bit registers for 16 blocks
+// GH order - 0, 1 | 2, 3 | 4, 5 | 6, 7 | 8, 9 | 10, 11 | 12, 13 | 14, 15
+// HK order - 1, 0 | 3, 2 | 5, 4 | 7, 6 | 9, 8 | 11, 10 | 13, 12 | 15, 14
 // input:
-//    const __m128i *HK - contains hashed keys
-//    const __m256i *HKeyKaratsuba - contains temporary data for Karatsuba method
-//    const __m256i *mask_lo - contains mask for taking lower bits
-//    const __m256i *mask_hi - contains mask for taking higher bits
+//    const __m256i *HK - contains hashed keys
 // input/output:
-//    __m128i *GH - contains GHASH. Will be overwritten in this function
+//    __m256i *GH - contains GHASH. Will be overwritten in this function
+// output:
+//    __m128i GH[0]
 */
-__INLINE void avx2_clmul_gcm(__m256i *GH, const __m256i *HK, const __m256i *HKeyKaratsuba, const __m256i *mask_lo, const __m256i *mask_hi) {
-   __m256i tmpX0, tmpX1, tmpX2;
-   
-   tmpX2 = _mm256_shuffle_epi32 (*GH, SHUFD_MASK);
-   // Karatsuba Method
-   tmpX1 = *GH;
-   tmpX2 = _mm256_xor_si256(tmpX2, *GH);
-   *GH = _mm256_clmulepi64_epi128(*GH, *HK, 0x00);
-   // Karatsuba Method
-
-   tmpX1 = _mm256_clmulepi64_epi128(tmpX1, *HK, 0x11);
-   tmpX2 = _mm256_clmulepi64_epi128(tmpX2, *HKeyKaratsuba, 0x00);
-   tmpX2 = _mm256_xor_si256(tmpX2, *GH);
-   tmpX2 = _mm256_xor_si256(tmpX2, tmpX1);
-   tmpX0 = _mm256_shuffle_epi32 (tmpX2, SHUFD_MASK);
-   tmpX2 = tmpX0;
-   tmpX0 = _mm256_and_si256(tmpX0, *mask_hi);
-   tmpX2 = _mm256_and_si256(tmpX2, *mask_lo);
-   *GH = _mm256_xor_si256(*GH, tmpX0);
-   tmpX1 = _mm256_xor_si256(tmpX1, tmpX2);
-
-   // first phase of the reduction
-   tmpX0 = *GH;
-   *GH = _mm256_slli_epi64 (*GH, 1);
-   *GH = _mm256_xor_si256(*GH, tmpX0);
-   *GH = _mm256_slli_epi64 (*GH, 5);
-   *GH = _mm256_xor_si256(*GH, tmpX0);
-   *GH = _mm256_slli_epi64 (*GH, 57);
-   tmpX2 = _mm256_shuffle_epi32(*GH, SHUFD_MASK);
-   *GH = tmpX2;
-   tmpX2 = _mm256_and_si256(tmpX2, *mask_lo);
-   *GH = _mm256_and_si256(*GH, *mask_hi);
-   *GH = _mm256_xor_si256(*GH, tmpX0);
-   tmpX1 = _mm256_xor_si256(tmpX1, tmpX2);
-
-   // second phase of the reduction
-   tmpX2 = *GH;
-   *GH = _mm256_srli_epi64(*GH, 5);
-   *GH = _mm256_xor_si256(*GH, tmpX2);
-   *GH = _mm256_srli_epi64(*GH, 1);
-   *GH = _mm256_xor_si256(*GH, tmpX2);
-   *GH = _mm256_srli_epi64(*GH, 1);
-   *GH = _mm256_xor_si256(*GH, tmpX2);
-   *GH = _mm256_xor_si256(*GH, tmpX1);
+__IPPCP_INLINE __m128i avx2_clmul_gcm16(__m256i *GH, const __m256i *HK) {
+   __m256i tmpX0, tmpX2, tmpX3, tmpX4, tmpX5;
+   tmpX2 = _mm256_shuffle_epi32 (GH[0], SHUFD_MASK);
+   tmpX3 = _mm256_shuffle_epi32 (HK[7], SHUFD_MASK);
+   tmpX2 = _mm256_xor_si256(tmpX2, GH[0]);
+   tmpX3 = _mm256_xor_si256(tmpX3, HK[7]);
+   tmpX0 = _mm256_clmulepi64_epi128 (GH[0], HK[7], 0x11);
+   tmpX5 = _mm256_clmulepi64_epi128 (GH[0], HK[7], 0x00);
+   GH[0] = _mm256_clmulepi64_epi128 (tmpX2, tmpX3, 0x00);
+
+   GH[0] = _mm256_xor_si256(GH[0], avx2_internal_mul(GH[1], HK[6], &tmpX0, &tmpX5));
+   GH[0] = _mm256_xor_si256(GH[0], avx2_internal_mul(GH[2], HK[5], &tmpX0, &tmpX5));
+   GH[0] = _mm256_xor_si256(GH[0], avx2_internal_mul(GH[3], HK[4], &tmpX0, &tmpX5));
+   GH[0] = _mm256_xor_si256(GH[0], avx2_internal_mul(GH[4], HK[3], &tmpX0, &tmpX5));
+   GH[0] = _mm256_xor_si256(GH[0], avx2_internal_mul(GH[5], HK[2], &tmpX0, &tmpX5));
+   GH[0] = _mm256_xor_si256(GH[0], avx2_internal_mul(GH[6], HK[1], &tmpX0, &tmpX5));
+   GH[0] = _mm256_xor_si256(GH[0], avx2_internal_mul(GH[7], HK[0], &tmpX0, &tmpX5));
+
+   GH[0] = _mm256_xor_si256(GH[0], tmpX0);
+   tmpX2 = _mm256_xor_si256(GH[0], tmpX5);
+   tmpX4 = _mm256_slli_si256(tmpX2, 8);
+   tmpX2 = _mm256_srli_si256(tmpX2, 8);
+   tmpX5 = _mm256_xor_si256(tmpX5, tmpX4); //
+   tmpX0 = _mm256_xor_si256(tmpX0, tmpX2); // tmpX0:tmpX5> holds the result of the accumulated carry-less multiplications
+
+   __m128i T0, T1;
+   T0 = _mm_xor_si128(_mm256_extractf128_si256(tmpX0, 0), _mm256_extractf128_si256(tmpX0, 1));
+   T1 = _mm_xor_si128(_mm256_extractf128_si256(tmpX5, 0), _mm256_extractf128_si256(tmpX5, 1));
+
+   // reduction phase
+   reduction(&T0, &T1);
+
+   GH[0] = _mm256_setr_m128i(_mm_xor_si128(T1, T0), _mm_setzero_si128()); //the result is in GH
+   return _mm_xor_si128(T1, T0);
 }
 
 /*
-// aes_encoder_avx2vaes_sb is used for single block encryption
+// avx2_clmul_gcm8 performs the hash calculation with 256-bit registers for 8 blocks
+// GH order - 0, 1 | 2, 3 | 4, 5 | 6, 7
+// HK order - 1, 0 | 3, 2 | 5, 4 | 7, 6
 // input:
-//    const Ipp8u *in - contains data for encryprion
-//    const int Nr - contains number of the rounds 
-//    const __m256i* keys - contains keys
+//    const __m256i *HK - contains hashed keys
+// input/output:
+//    __m256i *GH - contains GHASH. Will be overwritten in this function
 // output:
-//    Ipp8u *out - stores encrypted data.
+//    __m128i GH[0]
 */
-__INLINE void aes_encoder_avx2vaes_sb(const Ipp8u *in, Ipp8u *out, const int Nr, const __m256i* keys) {
-   __m128i lo = _mm_loadu_si128((void*)in);
-   __m128i hi = _mm_setzero_si128();
-   __m256i block = _mm256_setr_m128i(lo, hi);
-   block = _mm256_xor_si256(block, *keys);
-   for(int round = 1; round < Nr; round++) {
-      keys++;
-      block = _mm256_aesenc_epi128(block, *keys);
-   }
-   keys++;
-   block = _mm256_aesenclast_epi128(block, *keys);
-   _mm_storeu_si128((void*)out, _mm256_castsi256_si128(block));
+__IPPCP_INLINE __m128i avx2_clmul_gcm8(__m256i *GH, const __m256i *HK) {
+   __m256i tmpX0, tmpX2, tmpX3, tmpX4, tmpX5;
+   tmpX2 = _mm256_shuffle_epi32 (GH[0], SHUFD_MASK);
+   tmpX3 = _mm256_shuffle_epi32 (HK[3], SHUFD_MASK);
+   tmpX2 = _mm256_xor_si256(tmpX2, GH[0]);
+   tmpX3 = _mm256_xor_si256(tmpX3, HK[3]);
+   tmpX0 = _mm256_clmulepi64_epi128 (GH[0], HK[3], 0x11);
+   tmpX5 = _mm256_clmulepi64_epi128 (GH[0], HK[3], 0x00);
+   GH[0] = _mm256_clmulepi64_epi128 (tmpX2, tmpX3, 0x00);
+
+   GH[0] = _mm256_xor_si256(GH[0], avx2_internal_mul(GH[1], HK[2], &tmpX0, &tmpX5));
+   GH[0] = _mm256_xor_si256(GH[0], avx2_internal_mul(GH[2], HK[1], &tmpX0, &tmpX5));
+   GH[0] = _mm256_xor_si256(GH[0], avx2_internal_mul(GH[3], HK[0], &tmpX0, &tmpX5));
+
+   GH[0] = _mm256_xor_si256(GH[0], tmpX0);
+   tmpX2 = _mm256_xor_si256(GH[0], tmpX5);
+   tmpX4 = _mm256_slli_si256(tmpX2, 8);
+   tmpX2 = _mm256_srli_si256(tmpX2, 8);
+   tmpX5 = _mm256_xor_si256(tmpX5, tmpX4); //
+   tmpX0 = _mm256_xor_si256(tmpX0, tmpX2); // tmpX0:tmpX5> holds the result of the accumulated carry-less multiplications
+
+   __m128i T0, T1;
+   T0 = _mm_xor_si128(_mm256_extractf128_si256(tmpX0, 0), _mm256_extractf128_si256(tmpX0, 1));
+   T1 = _mm_xor_si128(_mm256_extractf128_si256(tmpX5, 0), _mm256_extractf128_si256(tmpX5, 1));
+
+   // reduction phase
+   reduction(&T0, &T1);
+
+   GH[0] = _mm256_setr_m128i(_mm_xor_si128(T1, T0), _mm_setzero_si128()); //the result is in GH
+   return _mm_xor_si128(T1, T0);
+}
+
+/*
+// avx2_clmul_gcm4 performs the hash calculation with 256-bit registers for 4 blocks
+// GH order - 0, 1 | 2, 3
+// HK order - 1, 0 | 3, 2
+// input:
+//    const __m256i *HK - contains hashed keys
+// input/output:
+//    __m256i *GH - contains GHASH. Will be overwritten in this function
+// output:
+//    __m128i GH[0]
+*/
+__IPPCP_INLINE __m128i avx2_clmul_gcm4(__m256i *GH, const __m256i *HK) {
+   __m256i tmpX0, tmpX2, tmpX3, tmpX4, tmpX5;
+   tmpX2 = _mm256_shuffle_epi32 (GH[0], SHUFD_MASK);
+   tmpX3 = _mm256_shuffle_epi32 (HK[1], SHUFD_MASK);
+   tmpX2 = _mm256_xor_si256(tmpX2, GH[0]);
+   tmpX3 = _mm256_xor_si256(tmpX3, HK[1]);
+   tmpX0 = _mm256_clmulepi64_epi128 (GH[0], HK[1], 0x11);
+   tmpX5 = _mm256_clmulepi64_epi128 (GH[0], HK[1], 0x00);
+   GH[0] = _mm256_clmulepi64_epi128 (tmpX2, tmpX3, 0x00);
+
+   GH[0] = _mm256_xor_si256(GH[0], avx2_internal_mul(GH[1], HK[0], &tmpX0, &tmpX5));
+
+   GH[0] = _mm256_xor_si256(GH[0], tmpX0);
+   tmpX2 = _mm256_xor_si256(GH[0], tmpX5);
+   tmpX4 = _mm256_slli_si256(tmpX2, 8);
+   tmpX2 = _mm256_srli_si256(tmpX2, 8);
+   tmpX5 = _mm256_xor_si256(tmpX5, tmpX4); //
+   tmpX0 = _mm256_xor_si256(tmpX0, tmpX2); // tmpX0:tmpX5> holds the result of the accumulated carry-less multiplications
+
+   __m128i T0, T1;
+   T0 = _mm_xor_si128(_mm256_extractf128_si256(tmpX0, 0), _mm256_extractf128_si256(tmpX0, 1));
+   T1 = _mm_xor_si128(_mm256_extractf128_si256(tmpX5, 0), _mm256_extractf128_si256(tmpX5, 1));
+
+   // reduction phase
+   reduction(&T0, &T1);
+
+   GH[0] = _mm256_setr_m128i(_mm_xor_si128(T1, T0), _mm_setzero_si128()); //the result is in GH
+
+   return _mm_xor_si128(T1, T0);
+}
+
+/*
+// avx2_clmul_gcm2 performs the hash calculation with 256-bit registers for 2 blocks
+// GH order - 0, 1
+// HK order - 1, 0
+// input:
+//    const __m256i *HK - contains hashed keys
+// input/output:
+//    __m256i *GH - contains GHASH. Will be overwritten in this function
+// output:
+//    __m128i GH[0]
+*/
+__IPPCP_INLINE __m128i avx2_clmul_gcm2(__m256i *GH, const __m256i *HK) {
+   __m256i tmpX0, tmpX2, tmpX3, tmpX4, tmpX5;
+   tmpX2 = _mm256_shuffle_epi32 (GH[0], SHUFD_MASK);
+   tmpX3 = _mm256_shuffle_epi32 (HK[0], SHUFD_MASK);
+   tmpX2 = _mm256_xor_si256(tmpX2, GH[0]);
+   tmpX3 = _mm256_xor_si256(tmpX3, HK[0]);
+   tmpX0 = _mm256_clmulepi64_epi128 (GH[0], HK[0], 0x11);
+   tmpX5 = _mm256_clmulepi64_epi128 (GH[0], HK[0], 0x00);
+   GH[0] = _mm256_clmulepi64_epi128 (tmpX2, tmpX3, 0x00);
+
+   GH[0] = _mm256_xor_si256(GH[0], tmpX0);
+   tmpX2 = _mm256_xor_si256(GH[0], tmpX5);
+   tmpX4 = _mm256_slli_si256(tmpX2, 8);
+   tmpX2 = _mm256_srli_si256(tmpX2, 8);
+   tmpX5 = _mm256_xor_si256(tmpX5, tmpX4); //
+   tmpX0 = _mm256_xor_si256(tmpX0, tmpX2); // tmpX0:tmpX5> holds the result of the accumulated carry-less multiplications
+
+   __m128i T0, T1;
+   T0 = _mm_xor_si128(_mm256_extractf128_si256(tmpX0, 0), _mm256_extractf128_si256(tmpX0, 1));
+   T1 = _mm_xor_si128(_mm256_extractf128_si256(tmpX5, 0), _mm256_extractf128_si256(tmpX5, 1));
+
+   // reduction phase
+   reduction(&T0, &T1);
+
+   GH[0] = _mm256_setr_m128i(_mm_xor_si128(T1, T0), _mm_setzero_si128()); //the result is in GH
+   return _mm_xor_si128(T1, T0);
+}
+
+/*
+// avx2_clmul_gcm performs the hash calculation with 256-bit registers for 1 blocks
+// GH order - 0
+// HK order - 0
+// input:
+//    const __m256i *HK - contains hashed keys
+// input/output:
+//    __m256i *GH - contains GHASH. Will be overwritten in this function
+// output:
+//    __m128i GH[0]
+*/
+__IPPCP_INLINE __m128i avx2_clmul_gcm(__m256i *GH, const __m256i *HK) {
+   __m256i tmpX0, tmpX2, tmpX3, tmpX4, tmpX5;
+   tmpX2 = _mm256_shuffle_epi32 (GH[0], SHUFD_MASK);
+   tmpX3 = _mm256_shuffle_epi32 (HK[0], SHUFD_MASK);
+   tmpX2 = _mm256_xor_si256(tmpX2, GH[0]);
+   tmpX3 = _mm256_xor_si256(tmpX3, HK[0]);
+   tmpX0 = _mm256_clmulepi64_epi128 (GH[0], HK[0], 0x11);
+   tmpX5 = _mm256_clmulepi64_epi128 (GH[0], HK[0], 0x00);
+   GH[0] = _mm256_clmulepi64_epi128 (tmpX2, tmpX3, 0x00);
+
+   GH[0] = _mm256_xor_si256(GH[0], tmpX0);
+   tmpX2 = _mm256_xor_si256(GH[0], tmpX5);
+   tmpX4 = _mm256_slli_si256(tmpX2, 8);
+   tmpX2 = _mm256_srli_si256(tmpX2, 8);
+   tmpX5 = _mm256_xor_si256(tmpX5, tmpX4); //
+   tmpX0 = _mm256_xor_si256(tmpX0, tmpX2); // tmpX0:tmpX5> holds the result of the accumulated carry-less multiplications
+
+   __m128i T0, T1;
+   T0 = _mm256_extractf128_si256(tmpX0, 0);
+   T1 = _mm256_extractf128_si256(tmpX5, 0);
+
+   // reduction phase
+   reduction(&T0, &T1);
+
+   GH[0] = _mm256_setr_m128i(_mm_xor_si128(T1, T0), _mm_setzero_si128()); //the result is in GH
+   return _mm_xor_si128(T1, T0);
 }
 
 #endif /* #if(_IPP==_IPP_H9) || (_IPP32E==_IPP32E_L9) */
diff --git a/sources/ippcp/pcpaes_avx2_vaes_decrypt.c b/sources/ippcp/pcpaes_avx2_vaes_decrypt.c
index 81a21d22..583de7f2 100644
--- a/sources/ippcp/pcpaes_avx2_vaes_decrypt.c
+++ b/sources/ippcp/pcpaes_avx2_vaes_decrypt.c
@@ -5,7 +5,7 @@
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
-* 	http://www.apache.org/licenses/LICENSE-2.0
+*  http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law  or agreed  to  in  writing,  software
 * distributed under  the License  is  distributed  on  an  "AS IS"  BASIS,
@@ -32,181 +32,555 @@ IPP_OWN_DEFN (void, AesGcmDec_vaes_avx2, (Ipp8u* pDst, const Ipp8u* pSrc, int le
    if (len < 256) {
       IppsAESSpec* pAES = AESGCM_CIPHER(pState);
       RijnCipher encoder = RIJ_ENCODER(pAES);
+      Ipp8u hkeys_old_order[48];
+
+      // put the hash keys in the correct order (hKey*t, (hKey*t)^2, (hKey*t)^4)
+      for (int i = 0; i < 32; i++) {
+         *(hkeys_old_order+i) = *(AESGCM_HKEY(pState)+i); // HKEY 0-32
+         if (i < 16)
+            *(hkeys_old_order+i+32) = *(AESGCM_HKEY(pState)+i+48); // HKEY 32-48
+      }
+
       AesGcmDec_avx(pDst, pSrc, len, encoder, RIJ_NR(pAES), RIJ_EKEYS(pAES), AESGCM_GHASH(pState),
-         AESGCM_COUNTER(pState), AESGCM_ECOUNTER(pState), AESGCM_HKEY(pState));
+         AESGCM_COUNTER(pState), AESGCM_ECOUNTER(pState), hkeys_old_order);
+
+      // zeroizing 
+      zeroize_256((Ipp32u*)hkeys_old_order, 12);
    }
    else {
-      const int nloop = len / STEP_SIZE;
       IppsRijndael128Spec* pAES = AESGCM_CIPHER(pState);
       Ipp8u* pCounter = AESGCM_COUNTER(pState);
       Ipp8u* pECounter = AESGCM_ECOUNTER(pState);
-      __m256i pCounter256, pCounter256_1, pECounter256, pECounter256_1;
-      __m256i block, block1, cipherText, cipherText_1, plainText, plainText_1;
+      __m256i pCounter256, pCounter256_1, pCounter256_2, pCounter256_3, pCounter256_4, pCounter256_5, pCounter256_6, pCounter256_7;
+      __m256i block, block1, block2, block3, block4, block5, block6, block7;
+      __m256i cipherText, cipherText_1, cipherText_2, cipherText_3, cipherText_4, cipherText_5, cipherText_6, cipherText_7;
+      __m256i plainText, plainText_1, plainText_2, plainText_3, plainText_4, plainText_5, plainText_6, plainText_7;
+      __m256i rpHash[8];
+      __m256i HashKey[8];
+      __m128i resultHash = _mm_setzero_si128();
+      __m256i tmpKey;
 
       // setting temporary data for incremention
+      const __m256i increment1    = _mm256_loadu_si256((void*)_increment1); // increment by 1
       const __m256i increment2    = _mm256_loadu_si256((void*)_increment2); // increment by 2
-      const __m256i increment4   = _mm256_loadu_si256((void*)_increment4); // increment by 4
-      const __m256i shuffle_mask = _mm256_loadu_si256((void*)swapBytes256);
+      const __m256i increment4    = _mm256_loadu_si256((void*)_increment4); // increment by 4
+      const __m256i increment8    = _mm256_loadu_si256((void*)_increment8); // increment by 8
+      const __m256i increment16   = _mm256_loadu_si256((void*)_increment16); // increment by 16
+      const __m256i shuffle_mask  = _mm256_loadu_si256((void*)swapBytes256);
 
-      // vectors are used to zeroizing 
-      __m128i zero_128 = _mm_setzero_si128();
+      // vector is used to zeroizing 
       __m256i zero_256 = _mm256_setzero_si256();
 
-      // loading keys from memory
-      __m256i rkeys[MAX_NK];
-      __m128i tmp_keys_128;
-      for (int i = 0; i < RIJ_NR(pAES) + 1; i++) {
-         tmp_keys_128 = _mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+i*16));
-         rkeys[i] = _mm256_setr_m128i(tmp_keys_128, tmp_keys_128);
+      // setting some masks
+      const __m128i shuff_mask_128 = _mm_loadu_si128((void*)_shuff_mask_128);
+      const __m256i shuff_mask_256 = _mm256_loadu_si256((void*)_shuff_mask_256);
+
+      // loading counters from memory
+      __m128i lo = _mm_loadu_si128((void*)pCounter);
+      IncrementCounter32(pCounter);
+      __m128i hi = _mm_loadu_si128((void*)pCounter);
+      pCounter256_7 = _mm256_setr_m128i(lo, hi);
+      pCounter256 = pCounter256_7;
+      IncrementRegister256(pCounter256_7, increment2, shuffle_mask);
+      pCounter256_1 = pCounter256_7;
+      IncrementRegister256(pCounter256_7, increment2, shuffle_mask);
+      pCounter256_2 = pCounter256_7;
+      IncrementRegister256(pCounter256_7, increment2, shuffle_mask);
+      pCounter256_3 = pCounter256_7;
+      IncrementRegister256(pCounter256_7, increment2, shuffle_mask);
+      pCounter256_4 = pCounter256_7;
+      IncrementRegister256(pCounter256_7, increment2, shuffle_mask);
+      pCounter256_5 = pCounter256_7;
+      IncrementRegister256(pCounter256_7, increment2, shuffle_mask);
+      pCounter256_6 = pCounter256_7;
+      IncrementRegister256(pCounter256_7, increment2, shuffle_mask);
+      
+      lo = _mm_loadu_si128((__m128i*)AESGCM_GHASH(pState));
+      hi = _mm_setzero_si128();
+      rpHash[0] = _mm256_setr_m128i(_mm_shuffle_epi8(lo, shuff_mask_128), hi);
+
+      // setting hash keys
+      Ipp8u *pkeys = AESGCM_HKEY(pState);
+      for (int i = 0; i < 8; i++) {
+         HashKey[i] = _mm256_setr_m128i(_mm_loadu_si128((void*)(pkeys+16)), _mm_loadu_si128((void*)pkeys));
+         pkeys += 32;
       }
 
-      // skip extra calculations if plaintext less than 4 blocks
-      if (nloop) {
-         // loading counters from memory
-         __m128i lo, hi;
-         lo = _mm_loadu_si128((void*)pCounter);
-         IncrementCounter32(pCounter);
-         hi = _mm_loadu_si128((void*)pCounter);
-         pCounter256_1 = _mm256_setr_m128i(lo, hi);
-         pCounter256 = pCounter256_1;
-         IncrementRegister256(pCounter256_1, increment2, shuffle_mask);
-
-         // setting some masks
-         const __m128i shuff_mask_128 = _mm_loadu_si128((void*)_shuff_mask_128);
-         const __m256i shuff_mask_256 = _mm256_loadu_si256((void*)_shuff_mask_256);
-         const __m256i mask_lo_256 = _mm256_loadu_si256((void*)_mask_lo_256);
-         const __m256i mask_hi_256 = _mm256_loadu_si256((void*)_mask_hi_256);
-         
-         lo = _mm_loadu_si128((void*)AESGCM_GHASH(pState));
-         hi = _mm_setzero_si128();
-         __m256i rpHash0 = _mm256_setr_m128i(_mm_shuffle_epi8(lo, shuff_mask_128), hi);
-         __m256i rpHash1 = _mm256_setzero_si256();
-
-         // setting pre-calculated data for hash combining
-         Ipp8u *pkeys = AESGCM_HKEY(pState);
-         __m128i HashKey0 = _mm_loadu_si128((void*)pkeys);
-         pkeys += 16;
-         __m128i HashKey2 = _mm_loadu_si128((void*)pkeys);
-         pkeys += 16;
-         __m128i HashKey4 = _mm_loadu_si128((void*)pkeys);
-
-         // setting pre-calculated data in correct order for Karatsuba method
-         __m256i HKey = _mm256_setr_m128i(HashKey4, HashKey4);
-         __m256i HKeyKaratsuba = _mm256_shuffle_epi32(HKey, SHUFD_MASK);
-         HKeyKaratsuba = _mm256_xor_si256(HKey, HKeyKaratsuba);
-         do {
-            // decrypt stage
-            block = _mm256_xor_si256(pCounter256, *rkeys);
-            block1 = _mm256_xor_si256(pCounter256_1, *rkeys);
-            block = _mm256_aesenc_epi128(block, *(rkeys+1));
-            block1 = _mm256_aesenc_epi128(block1, *(rkeys+1));
-            block = _mm256_aesenc_epi128(block, *(rkeys+2));
-            block1 = _mm256_aesenc_epi128(block1, *(rkeys+2));
-            block = _mm256_aesenc_epi128(block, *(rkeys+3));
-            block1 = _mm256_aesenc_epi128(block1, *(rkeys+3));
-            IncrementRegister256(pCounter256, increment4, shuffle_mask);
-            block = _mm256_aesenc_epi128(block, *(rkeys+4));
-            block1 = _mm256_aesenc_epi128(block1, *(rkeys+4));
-            block = _mm256_aesenc_epi128(block, *(rkeys+5));
-            block1 = _mm256_aesenc_epi128(block1, *(rkeys+5));
-            block = _mm256_aesenc_epi128(block, *(rkeys+6));
-            block1 = _mm256_aesenc_epi128(block1, *(rkeys+6));
-            block = _mm256_aesenc_epi128(block, *(rkeys+7));
-            block1 = _mm256_aesenc_epi128(block1, *(rkeys+7));
-            block = _mm256_aesenc_epi128(block, *(rkeys+8));
-            block1 = _mm256_aesenc_epi128(block1, *(rkeys+8));
-            block = _mm256_aesenc_epi128(block, *(rkeys+9));
-            block1 = _mm256_aesenc_epi128(block1, *(rkeys+9));
-            IncrementRegister256(pCounter256_1, increment4, shuffle_mask);
-            if (RIJ_NR(pAES) >= 12) {
-               block = _mm256_aesenc_epi128(block, *(rkeys+10));
-               block1 = _mm256_aesenc_epi128(block1, *(rkeys+10));
-               block = _mm256_aesenc_epi128(block, *(rkeys+11));
-               block1 = _mm256_aesenc_epi128(block1, *(rkeys+11));
-               if (RIJ_NR(pAES) >= 14) {
-                  block = _mm256_aesenc_epi128(block, *(rkeys+12));
-                  block1 = _mm256_aesenc_epi128(block1, *(rkeys+12));
-                  block = _mm256_aesenc_epi128(block, *(rkeys+13));
-                  block1 = _mm256_aesenc_epi128(block1, *(rkeys+13));
-               }
+      while(len >= 16*BLOCK_SIZE) {
+         // decrypt stage
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES))));
+         block = _mm256_xor_si256(pCounter256, tmpKey);
+         block1 = _mm256_xor_si256(pCounter256_1, tmpKey);
+         block2 = _mm256_xor_si256(pCounter256_2, tmpKey);
+         block3 = _mm256_xor_si256(pCounter256_3, tmpKey);
+         block4 = _mm256_xor_si256(pCounter256_4, tmpKey);
+         block5 = _mm256_xor_si256(pCounter256_5, tmpKey);
+         block6 = _mm256_xor_si256(pCounter256_6, tmpKey);
+         block7 = _mm256_xor_si256(pCounter256_7, tmpKey);
+         IncrementRegister256(pCounter256, increment16, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+1*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         block4 = _mm256_aesenc_epi128(block4, tmpKey);
+         block5 = _mm256_aesenc_epi128(block5, tmpKey);
+         block6 = _mm256_aesenc_epi128(block6, tmpKey);
+         block7 = _mm256_aesenc_epi128(block7, tmpKey);
+         IncrementRegister256(pCounter256_1, increment16, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+2*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         block4 = _mm256_aesenc_epi128(block4, tmpKey);
+         block5 = _mm256_aesenc_epi128(block5, tmpKey);
+         block6 = _mm256_aesenc_epi128(block6, tmpKey);
+         block7 = _mm256_aesenc_epi128(block7, tmpKey);
+         IncrementRegister256(pCounter256_2, increment16, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+3*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         block4 = _mm256_aesenc_epi128(block4, tmpKey);
+         block5 = _mm256_aesenc_epi128(block5, tmpKey);
+         block6 = _mm256_aesenc_epi128(block6, tmpKey);
+         block7 = _mm256_aesenc_epi128(block7, tmpKey);
+         IncrementRegister256(pCounter256_3, increment16, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+4*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         block4 = _mm256_aesenc_epi128(block4, tmpKey);
+         block5 = _mm256_aesenc_epi128(block5, tmpKey);
+         block6 = _mm256_aesenc_epi128(block6, tmpKey);
+         block7 = _mm256_aesenc_epi128(block7, tmpKey);
+         IncrementRegister256(pCounter256_4, increment16, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+5*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         block4 = _mm256_aesenc_epi128(block4, tmpKey);
+         block5 = _mm256_aesenc_epi128(block5, tmpKey);
+         block6 = _mm256_aesenc_epi128(block6, tmpKey);
+         block7 = _mm256_aesenc_epi128(block7, tmpKey);
+         IncrementRegister256(pCounter256_5, increment16, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+6*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         block4 = _mm256_aesenc_epi128(block4, tmpKey);
+         block5 = _mm256_aesenc_epi128(block5, tmpKey);
+         block6 = _mm256_aesenc_epi128(block6, tmpKey);
+         block7 = _mm256_aesenc_epi128(block7, tmpKey);
+         IncrementRegister256(pCounter256_6, increment16, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+7*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         block4 = _mm256_aesenc_epi128(block4, tmpKey);
+         block5 = _mm256_aesenc_epi128(block5, tmpKey);
+         block6 = _mm256_aesenc_epi128(block6, tmpKey);
+         block7 = _mm256_aesenc_epi128(block7, tmpKey);
+         IncrementRegister256(pCounter256_7, increment16, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+8*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         block4 = _mm256_aesenc_epi128(block4, tmpKey);
+         block5 = _mm256_aesenc_epi128(block5, tmpKey);
+         block6 = _mm256_aesenc_epi128(block6, tmpKey);
+         block7 = _mm256_aesenc_epi128(block7, tmpKey);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+9*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         block4 = _mm256_aesenc_epi128(block4, tmpKey);
+         block5 = _mm256_aesenc_epi128(block5, tmpKey);
+         block6 = _mm256_aesenc_epi128(block6, tmpKey);
+         block7 = _mm256_aesenc_epi128(block7, tmpKey);
+         if (RIJ_NR(pAES) >= 12) {
+            tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+10*16)));
+            block = _mm256_aesenc_epi128(block, tmpKey);
+            block1 = _mm256_aesenc_epi128(block1, tmpKey);
+            block2 = _mm256_aesenc_epi128(block2, tmpKey);
+            block3 = _mm256_aesenc_epi128(block3, tmpKey);
+            block4 = _mm256_aesenc_epi128(block4, tmpKey);
+            block5 = _mm256_aesenc_epi128(block5, tmpKey);
+            block6 = _mm256_aesenc_epi128(block6, tmpKey);
+            block7 = _mm256_aesenc_epi128(block7, tmpKey);
+            tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+11*16)));
+            block = _mm256_aesenc_epi128(block, tmpKey);
+            block1 = _mm256_aesenc_epi128(block1, tmpKey);
+            block2 = _mm256_aesenc_epi128(block2, tmpKey);
+            block3 = _mm256_aesenc_epi128(block3, tmpKey);
+            block4 = _mm256_aesenc_epi128(block4, tmpKey);
+            block5 = _mm256_aesenc_epi128(block5, tmpKey);
+            block6 = _mm256_aesenc_epi128(block6, tmpKey);
+            block7 = _mm256_aesenc_epi128(block7, tmpKey);
+            if (RIJ_NR(pAES) >= 14) {
+               tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+12*16)));
+               block = _mm256_aesenc_epi128(block, tmpKey);
+               block1 = _mm256_aesenc_epi128(block1, tmpKey);
+               block2 = _mm256_aesenc_epi128(block2, tmpKey);
+               block3 = _mm256_aesenc_epi128(block3, tmpKey);
+               block4 = _mm256_aesenc_epi128(block4, tmpKey);
+               block5 = _mm256_aesenc_epi128(block5, tmpKey);
+               block6 = _mm256_aesenc_epi128(block6, tmpKey);
+               block7 = _mm256_aesenc_epi128(block7, tmpKey);
+               tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+13*16)));
+               block = _mm256_aesenc_epi128(block, tmpKey);
+               block1 = _mm256_aesenc_epi128(block1, tmpKey);
+               block2 = _mm256_aesenc_epi128(block2, tmpKey);
+               block3 = _mm256_aesenc_epi128(block3, tmpKey);
+               block4 = _mm256_aesenc_epi128(block4, tmpKey);
+               block5 = _mm256_aesenc_epi128(block5, tmpKey);
+               block6 = _mm256_aesenc_epi128(block6, tmpKey);
+               block7 = _mm256_aesenc_epi128(block7, tmpKey);
             }
-            pECounter256 = _mm256_aesenclast_epi128(block, *(rkeys+RIJ_NR(pAES)));
-            pECounter256_1 = _mm256_aesenclast_epi128(block1, *(rkeys+RIJ_NR(pAES)));
-
-            // set ciphertext 
-            plainText = _mm256_loadu_si256((void*)pSrc);
-            cipherText = _mm256_xor_si256(plainText, pECounter256);
-            pSrc += HALF_STEP_SIZE;
-            plainText_1 = _mm256_loadu_si256((void*)pSrc);
-            cipherText_1 = _mm256_xor_si256(plainText_1, pECounter256_1);
-            pSrc += HALF_STEP_SIZE;
-
-            // hash calculation stage
-            rpHash0 = _mm256_xor_si256(rpHash0, _mm256_shuffle_epi8(plainText, shuff_mask_256));
-            _mm256_storeu_si256((void*)pDst, cipherText);
-            pDst += HALF_STEP_SIZE;
-            _mm256_storeu_si256((void*)pDst, cipherText_1);
-            pDst += HALF_STEP_SIZE;
-            rpHash1 = _mm256_xor_si256(rpHash1, _mm256_shuffle_epi8(plainText_1, shuff_mask_256));
-            len -= STEP_SIZE;
-            if (len >= STEP_SIZE) {
-               avx2_clmul_gcm(&rpHash0, &HKey, &HKeyKaratsuba, &mask_lo_256, &mask_hi_256);
-               avx2_clmul_gcm(&rpHash1, &HKey, &HKeyKaratsuba, &mask_lo_256, &mask_hi_256);
+         }
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+RIJ_NR(pAES)*16))); 
+         block = _mm256_aesenclast_epi128(block, tmpKey);
+         block1 = _mm256_aesenclast_epi128(block1, tmpKey);
+         block2 = _mm256_aesenclast_epi128(block2, tmpKey);
+         block3 = _mm256_aesenclast_epi128(block3, tmpKey);
+         block4 = _mm256_aesenclast_epi128(block4, tmpKey);
+         block5 = _mm256_aesenclast_epi128(block5, tmpKey);
+         block6 = _mm256_aesenclast_epi128(block6, tmpKey);
+         block7 = _mm256_aesenclast_epi128(block7, tmpKey);
+
+         // set ciphertext 
+         plainText = _mm256_loadu_si256((void*)pSrc);
+         cipherText = _mm256_xor_si256(plainText, block);
+         plainText_1 = _mm256_loadu_si256((void*)(pSrc+2*BLOCK_SIZE));
+         cipherText_1 = _mm256_xor_si256(plainText_1, block1);
+         plainText_2 = _mm256_loadu_si256((void*)(pSrc+4*BLOCK_SIZE));
+         cipherText_2 = _mm256_xor_si256(plainText_2, block2);
+         plainText_3 = _mm256_loadu_si256((void*)(pSrc+6*BLOCK_SIZE));
+         cipherText_3 = _mm256_xor_si256(plainText_3, block3);
+         plainText_4 = _mm256_loadu_si256((void*)(pSrc+8*BLOCK_SIZE));
+         cipherText_4 = _mm256_xor_si256(plainText_4, block4);
+         plainText_5 = _mm256_loadu_si256((void*)(pSrc+10*BLOCK_SIZE));
+         cipherText_5 = _mm256_xor_si256(plainText_5, block5);
+         plainText_6 = _mm256_loadu_si256((void*)(pSrc+12*BLOCK_SIZE));
+         cipherText_6 = _mm256_xor_si256(plainText_6, block6);
+         plainText_7 = _mm256_loadu_si256((void*)(pSrc+14*BLOCK_SIZE));
+         cipherText_7 = _mm256_xor_si256(plainText_7, block7);
+         pSrc += 16*BLOCK_SIZE;
+         _mm256_storeu_si256((void*)pDst, cipherText);
+         _mm256_storeu_si256((void*)(pDst+2*BLOCK_SIZE), cipherText_1);
+         _mm256_storeu_si256((void*)(pDst+4*BLOCK_SIZE), cipherText_2);
+         _mm256_storeu_si256((void*)(pDst+6*BLOCK_SIZE), cipherText_3);
+         _mm256_storeu_si256((void*)(pDst+8*BLOCK_SIZE), cipherText_4);
+         _mm256_storeu_si256((void*)(pDst+10*BLOCK_SIZE), cipherText_5);
+         _mm256_storeu_si256((void*)(pDst+12*BLOCK_SIZE), cipherText_6);
+         _mm256_storeu_si256((void*)(pDst+14*BLOCK_SIZE), cipherText_7);
+         pDst += 16*BLOCK_SIZE;
+
+         // hash calculation stage
+         rpHash[0] = _mm256_xor_si256(rpHash[0], _mm256_shuffle_epi8(plainText, shuff_mask_256));
+         rpHash[1] = _mm256_shuffle_epi8(plainText_1, shuff_mask_256);
+         rpHash[2] = _mm256_shuffle_epi8(plainText_2, shuff_mask_256);
+         rpHash[3] = _mm256_shuffle_epi8(plainText_3, shuff_mask_256);
+         rpHash[4] = _mm256_shuffle_epi8(plainText_4, shuff_mask_256);
+         rpHash[5] = _mm256_shuffle_epi8(plainText_5, shuff_mask_256);
+         rpHash[6] = _mm256_shuffle_epi8(plainText_6, shuff_mask_256);
+         rpHash[7] = _mm256_shuffle_epi8(plainText_7, shuff_mask_256);
+         resultHash = avx2_clmul_gcm16(rpHash, HashKey);
+
+         len -= 16*BLOCK_SIZE;
+      } // while(len >= 16*BLOCK_SIZE)
+
+      if (len >= 8*BLOCK_SIZE) {
+         // decrypt stage
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES))));
+         block = _mm256_xor_si256(pCounter256, tmpKey);
+         block1 = _mm256_xor_si256(pCounter256_1, tmpKey);
+         block2 = _mm256_xor_si256(pCounter256_2, tmpKey);
+         block3 = _mm256_xor_si256(pCounter256_3, tmpKey);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+1*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         IncrementRegister256(pCounter256, increment8, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+2*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+3*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         IncrementRegister256(pCounter256_1, increment8, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+4*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+5*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         IncrementRegister256(pCounter256_2, increment8, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+6*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+7*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+8*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         IncrementRegister256(pCounter256_3, increment8, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+9*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         if (RIJ_NR(pAES) >= 12) {
+            tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+10*16)));
+            block = _mm256_aesenc_epi128(block, tmpKey);
+            block1 = _mm256_aesenc_epi128(block1, tmpKey);
+            block2 = _mm256_aesenc_epi128(block2, tmpKey);
+            block3 = _mm256_aesenc_epi128(block3, tmpKey);
+            tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+11*16)));
+            block = _mm256_aesenc_epi128(block, tmpKey);
+            block1 = _mm256_aesenc_epi128(block1, tmpKey);
+            block2 = _mm256_aesenc_epi128(block2, tmpKey);
+            block3 = _mm256_aesenc_epi128(block3, tmpKey);
+            if (RIJ_NR(pAES) >= 14) {
+               tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+12*16)));
+               block = _mm256_aesenc_epi128(block, tmpKey);
+               block1 = _mm256_aesenc_epi128(block1, tmpKey);
+               block2 = _mm256_aesenc_epi128(block2, tmpKey);
+               block3 = _mm256_aesenc_epi128(block3, tmpKey);
+               tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+13*16)));
+               block = _mm256_aesenc_epi128(block, tmpKey);
+               block1 = _mm256_aesenc_epi128(block1, tmpKey);
+               block2 = _mm256_aesenc_epi128(block2, tmpKey);
+               block3 = _mm256_aesenc_epi128(block3, tmpKey);
             }
-         } while(len >= STEP_SIZE);
-
-         // loading temporary data to memory
-         _mm_storeu_si128((void*)pECounter, _mm256_extractf128_si256(pECounter256, 1));
-         _mm_storeu_si128((void*)pCounter, _mm256_castsi256_si128(pCounter256));
-
-         // combine hash
-         __m128i GHash0 = _mm256_extractf128_si256(rpHash0, 0);
-         __m128i GHash1 = _mm256_extractf128_si256(rpHash0, 1);
-         __m128i GHash2 = _mm256_extractf128_si256(rpHash1, 0);
-         __m128i GHash3 = _mm256_extractf128_si256(rpHash1, 1);
-         
-         sse_clmul_gcm(&GHash0, &HashKey4); //GHash0 = GHash0 * (HashKey^4)<<1 mod poly
-         sse_clmul_gcm(&GHash1, &HashKey2); //GHash1 = GHash1 * (HashKey^2)<<1 mod poly
-         sse_clmul_gcm(&GHash2, &HashKey0); //GHash2 = GHash2 * (HashKey^1)<<1 mod poly
-         GHash3 = _mm_xor_si128(GHash3, GHash1);
-         GHash3 = _mm_xor_si128(GHash3, GHash2);
-         
-         sse_clmul_gcm(&GHash3, &HashKey0); //GHash3 = GHash3 * (HashKey)<<1 mod poly
-         GHash3 = _mm_xor_si128(GHash3, GHash0);
-         GHash3 = _mm_shuffle_epi8(GHash3, shuff_mask_128);
-         _mm_storeu_si128((void*)(AESGCM_GHASH(pState)), GHash3);
-
-         // HKeys zeroizing
-         _mm_storeu_si128(&HashKey0, zero_128);
-         _mm_storeu_si128(&HashKey2, zero_128);
-         _mm_storeu_si128(&HashKey4, zero_128);
-         _mm256_storeu_si256(&HKey, zero_256);
-         _mm256_storeu_si256(&HKeyKaratsuba, zero_256);
+         }
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+RIJ_NR(pAES)*16))); 
+         block = _mm256_aesenclast_epi128(block, tmpKey);
+         block1 = _mm256_aesenclast_epi128(block1, tmpKey);
+         block2 = _mm256_aesenclast_epi128(block2, tmpKey);
+         block3 = _mm256_aesenclast_epi128(block3, tmpKey);
+
+         // set ciphertext 
+         plainText = _mm256_loadu_si256((void*)pSrc);
+         cipherText = _mm256_xor_si256(plainText, block);
+         plainText_1 = _mm256_loadu_si256((void*)(pSrc+2*BLOCK_SIZE));
+         cipherText_1 = _mm256_xor_si256(plainText_1, block1);
+         plainText_2 = _mm256_loadu_si256((void*)(pSrc+4*BLOCK_SIZE));
+         cipherText_2 = _mm256_xor_si256(plainText_2, block2);
+         plainText_3 = _mm256_loadu_si256((void*)(pSrc+6*BLOCK_SIZE));
+         cipherText_3 = _mm256_xor_si256(plainText_3, block3);
+         pSrc += 8*BLOCK_SIZE;
+         _mm256_storeu_si256((void*)pDst, cipherText);
+         _mm256_storeu_si256((void*)(pDst+2*BLOCK_SIZE), cipherText_1);
+         _mm256_storeu_si256((void*)(pDst+4*BLOCK_SIZE), cipherText_2);
+         _mm256_storeu_si256((void*)(pDst+6*BLOCK_SIZE), cipherText_3);
+         pDst += 8*BLOCK_SIZE;
+
+         // hash calculation stage
+         rpHash[0] = _mm256_xor_si256(rpHash[0], _mm256_shuffle_epi8(plainText, shuff_mask_256));
+         rpHash[1] = _mm256_shuffle_epi8(plainText_1, shuff_mask_256);
+         rpHash[2] = _mm256_shuffle_epi8(plainText_2, shuff_mask_256);
+         rpHash[3] = _mm256_shuffle_epi8(plainText_3, shuff_mask_256);
+         resultHash = avx2_clmul_gcm8(rpHash, HashKey);
+
+         len -= 8*BLOCK_SIZE;
+      } //if (len >= 8*BLOCK_SIZE)
+
+      if (len >= 4*BLOCK_SIZE) {
+         // decrypt stage
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES))));
+         block = _mm256_xor_si256(pCounter256, tmpKey);
+         block1 = _mm256_xor_si256(pCounter256_1, tmpKey);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+1*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+2*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         IncrementRegister256(pCounter256, increment4, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+3*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+4*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+5*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+6*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         IncrementRegister256(pCounter256_1, increment4, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+7*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+8*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+9*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         if (RIJ_NR(pAES) >= 12) {
+            tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+10*16)));
+            block = _mm256_aesenc_epi128(block, tmpKey);
+            block1 = _mm256_aesenc_epi128(block1, tmpKey);
+            tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+11*16)));
+            block = _mm256_aesenc_epi128(block, tmpKey);
+            block1 = _mm256_aesenc_epi128(block1, tmpKey);
+            if (RIJ_NR(pAES) >= 14) {
+               tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+12*16)));
+               block = _mm256_aesenc_epi128(block, tmpKey);
+               block1 = _mm256_aesenc_epi128(block1, tmpKey);
+               tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+13*16)));
+               block = _mm256_aesenc_epi128(block, tmpKey);
+               block1 = _mm256_aesenc_epi128(block1, tmpKey);
+            }
+         }
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+RIJ_NR(pAES)*16))); 
+         block = _mm256_aesenclast_epi128(block, tmpKey);
+         block1 = _mm256_aesenclast_epi128(block1, tmpKey);
+
+         // set ciphertext 
+         plainText = _mm256_loadu_si256((void*)pSrc);
+         cipherText = _mm256_xor_si256(plainText, block);
+         plainText_1 = _mm256_loadu_si256((void*)(pSrc+2*BLOCK_SIZE));
+         cipherText_1 = _mm256_xor_si256(plainText_1, block1);
+         pSrc += 4*BLOCK_SIZE;
+         _mm256_storeu_si256((void*)pDst, cipherText);
+         _mm256_storeu_si256((void*)(pDst+2*BLOCK_SIZE), cipherText_1);
+         pDst += 4*BLOCK_SIZE;
+         // hash calculation stage
+         rpHash[0] = _mm256_xor_si256(rpHash[0], _mm256_shuffle_epi8(plainText, shuff_mask_256));
+         rpHash[1] = _mm256_shuffle_epi8(plainText_1, shuff_mask_256);
+         resultHash = avx2_clmul_gcm4(rpHash, HashKey);
+         len -= 4*BLOCK_SIZE;
+      } //if (len >= 4*BLOCK_SIZE)
+
+      if (len >= 2*BLOCK_SIZE) {
+         block = _mm256_xor_si256(pCounter256, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+1*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+2*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+3*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+4*16))));
+         IncrementRegister256(pCounter256, increment2, shuffle_mask);
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+5*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+6*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+7*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+8*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+9*16))));
+         if (RIJ_NR(pAES) >= 12) {
+            block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+10*16))));
+            block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+11*16))));
+            if (RIJ_NR(pAES) >= 14) {
+               block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+12*16))));
+               block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+13*16))));
+            }
+         } 
+         block = _mm256_aesenclast_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+RIJ_NR(pAES)*16))));
+
+         // set ciphertext 
+         plainText = _mm256_loadu_si256((void*)pSrc);
+         cipherText = _mm256_xor_si256(plainText, block);
+         pSrc += 2*BLOCK_SIZE;
+         _mm256_storeu_si256((void*)pDst, cipherText);
+         pDst += 2*BLOCK_SIZE;
+         // hash calculation stage
+         rpHash[0] = _mm256_xor_si256(rpHash[0], _mm256_shuffle_epi8(plainText, shuff_mask_256));
+         resultHash = avx2_clmul_gcm2(rpHash, HashKey);
+         len -= 2*BLOCK_SIZE;
       }
 
-      const Ipp8u* pHashedData = pSrc;
-      int hashedDataLen = len;
+      // encryption for the tail (1-3 block)
+      while (len >= BLOCK_SIZE) {
+         block = _mm256_xor_si256(pCounter256, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+1*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+2*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+3*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+4*16))));
+         IncrementRegister256(pCounter256, increment1, shuffle_mask);
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+5*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+6*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+7*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+8*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+9*16))));
+         if (RIJ_NR(pAES) >= 12) {
+            block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+10*16))));
+            block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+11*16))));
+            if (RIJ_NR(pAES) >= 14) {
+               block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+12*16))));
+               block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+13*16))));
+            }
+         } 
+         block = _mm256_aesenclast_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+RIJ_NR(pAES)*16))));
 
-      // decryption for the tail (1-3 blocks)
-      while(len >= BLOCK_SIZE) {
-         aes_encoder_avx2vaes_sb(pCounter, pECounter, RIJ_NR(pAES), rkeys);
-         XorBlock16(pSrc, pECounter, pDst);
+         // set ciphertext 
+         plainText = _mm256_loadu_si256((void*)pSrc);
+         cipherText = _mm256_xor_si256(plainText, block);
          pSrc += BLOCK_SIZE;
+         _mm_storeu_si128((void*)pDst, _mm256_castsi256_si128(cipherText));
          pDst += BLOCK_SIZE;
+         // hash calculation stage
+         HashKey[0] = _mm256_setr_m128i(_mm_loadu_si128((void*)(AESGCM_HKEY(pState))), _mm_loadu_si128((void*)(AESGCM_HKEY(pState))));
+         rpHash[0] = _mm256_xor_si256(rpHash[0], _mm256_shuffle_epi8(plainText, shuff_mask_256));
+         resultHash = avx2_clmul_gcm(rpHash, HashKey);
          len -= BLOCK_SIZE;
-         IncrementCounter32(pCounter);
       }
-      aes_encoder_avx2vaes_sb(pCounter, pECounter, RIJ_NR(pAES), rkeys);
 
-      // hash calculation for the tail (1-3 blocks)
-      if (hashedDataLen >= BLOCK_SIZE)
-         AesGcmAuth_avx(AESGCM_GHASH(pState), pHashedData, hashedDataLen, AESGCM_HKEY(pState), AesGcmConst_table);
+      //decrypt the remainder
+      block = _mm256_xor_si256(pCounter256, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)))));
+      block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+1*16))));
+      block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+2*16))));
+      block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+3*16))));
+      block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+4*16))));
+      block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+5*16))));
+      block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+6*16))));
+      block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+7*16))));
+      block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+8*16))));
+      block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+9*16))));
+      if (RIJ_NR(pAES) >= 12) {
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+10*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+11*16))));
+         if (RIJ_NR(pAES) >= 14) {
+            block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+12*16))));
+            block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+13*16))));
+         }
+      } 
+      block = _mm256_aesenclast_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+RIJ_NR(pAES)*16))));
 
-      // keys zeroizing
-      for (int i = 0; i < RIJ_NR(pAES) + 1; i++)
-         _mm256_storeu_si256((rkeys+i), zero_256);
-      _mm_storeu_si128(&tmp_keys_128, zero_128);
-   }
-}
+      // loand data to the memory
+      _mm_storeu_si128((void*)pECounter, _mm256_castsi256_si128(block));
+      _mm_storeu_si128((void*)pCounter, _mm256_castsi256_si128(pCounter256));
+      resultHash = _mm_shuffle_epi8(resultHash, shuff_mask_128);
+      _mm_storeu_si128((void*)(AESGCM_GHASH(pState)), resultHash);
 
+      // HKeys zeroizing
+      for (int i = 0; i < 8; i++)
+         _mm256_storeu_si256((HashKey+i), zero_256);
+      tmpKey = _mm256_setzero_si256();
+   } // if (len < 256)
+} 
 #endif /* #if (_IPP==_IPP_H9) || (_IPP32E==_IPP32E_L9) */
diff --git a/sources/ippcp/pcpaes_avx2_vaes_encrypt.c b/sources/ippcp/pcpaes_avx2_vaes_encrypt.c
index 8c3b93de..2c6f8bf1 100644
--- a/sources/ippcp/pcpaes_avx2_vaes_encrypt.c
+++ b/sources/ippcp/pcpaes_avx2_vaes_encrypt.c
@@ -5,7 +5,7 @@
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
-* 	http://www.apache.org/licenses/LICENSE-2.0
+*  http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law  or agreed  to  in  writing,  software
 * distributed under  the License  is  distributed  on  an  "AS IS"  BASIS,
@@ -32,179 +32,546 @@ IPP_OWN_DEFN (void, AesGcmEnc_vaes_avx2, (Ipp8u* pDst, const Ipp8u* pSrc, int le
    if (len < 256) {
       IppsAESSpec* pAES = AESGCM_CIPHER(pState);
       RijnCipher encoder = RIJ_ENCODER(pAES);
+      Ipp8u hkeys_old_order[48];
+
+      // put the hash keys in the correct order (hKey*t, (hKey*t)^2, (hKey*t)^4)
+      for (int i = 0; i < 32; i++) {
+         *(hkeys_old_order+i) = *(AESGCM_HKEY(pState)+i); // HKEY 0-32
+         if (i < 16)
+            *(hkeys_old_order+i+32) = *(AESGCM_HKEY(pState)+i+48); // HKEY 32-48
+      }
+
       AesGcmEnc_avx(pDst, pSrc, len, encoder, RIJ_NR(pAES), RIJ_EKEYS(pAES), AESGCM_GHASH(pState),
-         AESGCM_COUNTER(pState), AESGCM_ECOUNTER(pState), AESGCM_HKEY(pState));
+         AESGCM_COUNTER(pState), AESGCM_ECOUNTER(pState), hkeys_old_order);
+
+      // zeroizing 
+      zeroize_256((Ipp32u*)hkeys_old_order, 12);
    }
    else {
-      const int nloop = len / STEP_SIZE;
       IppsRijndael128Spec* pAES = AESGCM_CIPHER(pState);
       Ipp8u* pCounter = AESGCM_COUNTER(pState);
       Ipp8u* pECounter = AESGCM_ECOUNTER(pState);
-      __m256i pCounter256, pCounter256_1, pECounter256, pECounter256_1;
-      __m256i block, block1, cipherText, cipherText_1;
+      __m256i pCounter256, pCounter256_1, pCounter256_2, pCounter256_3, pCounter256_4, pCounter256_5, pCounter256_6, pCounter256_7;
+      __m256i block, block1, block2, block3, block4, block5, block6, block7;
+      __m256i cipherText, cipherText_1, cipherText_2, cipherText_3, cipherText_4, cipherText_5, cipherText_6, cipherText_7;
+      __m256i rpHash[8];
+      __m256i HashKey[8];
+      __m128i resultHash = _mm_setzero_si128();
+      __m256i tmpKey;
 
       // setting temporary data for incremention
+      const __m256i increment1    = _mm256_loadu_si256((void*)_increment1); // increment by 1
       const __m256i increment2    = _mm256_loadu_si256((void*)_increment2); // increment by 2
-      const __m256i increment4   = _mm256_loadu_si256((void*)_increment4); // increment by 4
-      const __m256i shuffle_mask = _mm256_loadu_si256((void*)swapBytes256);
+      const __m256i increment4    = _mm256_loadu_si256((void*)_increment4); // increment by 4
+      const __m256i increment8    = _mm256_loadu_si256((void*)_increment8); // increment by 8
+      const __m256i increment16   = _mm256_loadu_si256((void*)_increment16); // increment by 16
+      const __m256i shuffle_mask  = _mm256_loadu_si256((void*)swapBytes256);
 
-      // vectors are used to zeroizing 
-      __m128i zero_128 = _mm_setzero_si128();
+      // vector is used to zeroizing 
       __m256i zero_256 = _mm256_setzero_si256();
 
-      // loading keys from memory
-      __m256i rkeys[MAX_NK];
-      __m128i tmp_keys_128;
-      for (int i = 0; i < RIJ_NR(pAES) + 1; i++) {
-         tmp_keys_128 = _mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+i*16));
-         rkeys[i] = _mm256_setr_m128i(tmp_keys_128, tmp_keys_128);
+      // setting some masks
+      const __m128i shuff_mask_128 = _mm_loadu_si128((void*)_shuff_mask_128);
+      const __m256i shuff_mask_256 = _mm256_loadu_si256((void*)_shuff_mask_256);
+
+      // loading counters from memory
+      __m128i lo = _mm_loadu_si128((void*)pCounter);
+      IncrementCounter32(pCounter);
+      __m128i hi = _mm_loadu_si128((void*)pCounter);
+      pCounter256_7 = _mm256_setr_m128i(lo, hi);
+      pCounter256 = pCounter256_7;
+      IncrementRegister256(pCounter256_7, increment2, shuffle_mask);
+      pCounter256_1 = pCounter256_7;
+      IncrementRegister256(pCounter256_7, increment2, shuffle_mask);
+      pCounter256_2 = pCounter256_7;
+      IncrementRegister256(pCounter256_7, increment2, shuffle_mask);
+      pCounter256_3 = pCounter256_7;
+      IncrementRegister256(pCounter256_7, increment2, shuffle_mask);
+      pCounter256_4 = pCounter256_7;
+      IncrementRegister256(pCounter256_7, increment2, shuffle_mask);
+      pCounter256_5 = pCounter256_7;
+      IncrementRegister256(pCounter256_7, increment2, shuffle_mask);
+      pCounter256_6 = pCounter256_7;
+      IncrementRegister256(pCounter256_7, increment2, shuffle_mask);
+      
+      lo = _mm_loadu_si128((__m128i*)AESGCM_GHASH(pState));
+      hi = _mm_setzero_si128();
+      rpHash[0] = _mm256_setr_m128i(_mm_shuffle_epi8(lo, shuff_mask_128), hi);
+
+      // setting hash keys
+      Ipp8u *pkeys = AESGCM_HKEY(pState);
+      for (int i = 0; i < 8; i++) {
+         HashKey[i] = _mm256_setr_m128i(_mm_loadu_si128((void*)(pkeys+16)), _mm_loadu_si128((void*)pkeys));
+         pkeys += 32;
       }
 
-      // skip extra calculations if plaintext less than 4 blocks
-      if (nloop) {
-         // loading counters from memory
-         __m128i lo, hi;
-         lo = _mm_loadu_si128((void*)pCounter);
-         IncrementCounter32(pCounter);
-         hi = _mm_loadu_si128((void*)pCounter);
-         pCounter256_1 = _mm256_setr_m128i(lo, hi);
-         pCounter256 = pCounter256_1;
-         IncrementRegister256(pCounter256_1, increment2, shuffle_mask);
-
-         // setting some masks
-         const __m128i shuff_mask_128 = _mm_loadu_si128((void*)_shuff_mask_128);
-         const __m256i shuff_mask_256 = _mm256_loadu_si256((void*)_shuff_mask_256);
-         const __m256i mask_lo_256 = _mm256_loadu_si256((void*)_mask_lo_256);
-         const __m256i mask_hi_256 = _mm256_loadu_si256((void*)_mask_hi_256);
-         
-         lo = _mm_loadu_si128((__m128i*)AESGCM_GHASH(pState));
-         hi = _mm_setzero_si128();
-         __m256i rpHash0 = _mm256_setr_m128i(_mm_shuffle_epi8(lo, shuff_mask_128), hi);
-         __m256i rpHash1 = _mm256_setzero_si256();
-
-         // setting pre-calculated data for hash combining
-         Ipp8u *pkeys = AESGCM_HKEY(pState);
-         __m128i HashKey0 = _mm_loadu_si128((void*)pkeys);
-         pkeys += 16;
-         __m128i HashKey2 = _mm_loadu_si128((void*)pkeys);
-         pkeys += 16;
-         __m128i HashKey4 = _mm_loadu_si128((void*)pkeys);
-
-         // setting pre-calculated data in correct order for Karatsuba method
-         __m256i HKey = _mm256_setr_m128i(HashKey4, HashKey4);
-         __m256i HKeyKaratsuba = _mm256_shuffle_epi32(HKey, SHUFD_MASK);
-         HKeyKaratsuba = _mm256_xor_si256(HKey, HKeyKaratsuba);
-         do {
-            // encrypt stage
-            block = _mm256_xor_si256(pCounter256, *rkeys);
-            block1 = _mm256_xor_si256(pCounter256_1, *rkeys);
-            block = _mm256_aesenc_epi128(block, *(rkeys+1));
-            block1 = _mm256_aesenc_epi128(block1, *(rkeys+1));
-            block = _mm256_aesenc_epi128(block, *(rkeys+2));
-            block1 = _mm256_aesenc_epi128(block1, *(rkeys+2));
-            block = _mm256_aesenc_epi128(block, *(rkeys+3));
-            block1 = _mm256_aesenc_epi128(block1, *(rkeys+3));
-            IncrementRegister256(pCounter256, increment4, shuffle_mask);
-            block = _mm256_aesenc_epi128(block, *(rkeys+4));
-            block1 = _mm256_aesenc_epi128(block1, *(rkeys+4));
-            block = _mm256_aesenc_epi128(block, *(rkeys+5));
-            block1 = _mm256_aesenc_epi128(block1, *(rkeys+5));
-            block = _mm256_aesenc_epi128(block, *(rkeys+6));
-            block1 = _mm256_aesenc_epi128(block1, *(rkeys+6));
-            block = _mm256_aesenc_epi128(block, *(rkeys+7));
-            block1 = _mm256_aesenc_epi128(block1, *(rkeys+7));
-            block = _mm256_aesenc_epi128(block, *(rkeys+8));
-            block1 = _mm256_aesenc_epi128(block1, *(rkeys+8));
-            block = _mm256_aesenc_epi128(block, *(rkeys+9));
-            block1 = _mm256_aesenc_epi128(block1, *(rkeys+9));
-            IncrementRegister256(pCounter256_1, increment4, shuffle_mask);
-            if (RIJ_NR(pAES) >= 12) {
-               block = _mm256_aesenc_epi128(block, *(rkeys+10));
-               block1 = _mm256_aesenc_epi128(block1, *(rkeys+10));
-               block = _mm256_aesenc_epi128(block, *(rkeys+11));
-               block1 = _mm256_aesenc_epi128(block1, *(rkeys+11));
-               if (RIJ_NR(pAES) >= 14) {
-                  block = _mm256_aesenc_epi128(block, *(rkeys+12));
-                  block1 = _mm256_aesenc_epi128(block1, *(rkeys+12));
-                  block = _mm256_aesenc_epi128(block, *(rkeys+13));
-                  block1 = _mm256_aesenc_epi128(block1, *(rkeys+13));
-               }
-            } 
-            pECounter256 = _mm256_aesenclast_epi128(block, *(rkeys+RIJ_NR(pAES)));
-            pECounter256_1 = _mm256_aesenclast_epi128(block1, *(rkeys+RIJ_NR(pAES)));
-
-            // set ciphertext 
-            cipherText = _mm256_xor_si256( _mm256_loadu_si256((void*)pSrc), pECounter256);
-            pSrc += HALF_STEP_SIZE;
-            cipherText_1 = _mm256_xor_si256( _mm256_loadu_si256((void*)pSrc), pECounter256_1);
-            pSrc += HALF_STEP_SIZE;
-
-            // hash calculation stage
-            rpHash0 = _mm256_xor_si256(rpHash0, _mm256_shuffle_epi8(cipherText, shuff_mask_256));
-            _mm256_storeu_si256((void*)pDst, cipherText);
-            pDst += HALF_STEP_SIZE;
-            _mm256_storeu_si256((void*)pDst, cipherText_1);
-            pDst += HALF_STEP_SIZE;
-            rpHash1 = _mm256_xor_si256(rpHash1, _mm256_shuffle_epi8(cipherText_1, shuff_mask_256));
-            len -= STEP_SIZE;
-            if (len >= STEP_SIZE) {
-               avx2_clmul_gcm(&rpHash0, &HKey, &HKeyKaratsuba, &mask_lo_256, &mask_hi_256);
-               avx2_clmul_gcm(&rpHash1, &HKey, &HKeyKaratsuba, &mask_lo_256, &mask_hi_256);
+      while(len >= 16*BLOCK_SIZE) {
+         // encrypt stage
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES))));
+         block = _mm256_xor_si256(pCounter256, tmpKey);
+         block1 = _mm256_xor_si256(pCounter256_1, tmpKey);
+         block2 = _mm256_xor_si256(pCounter256_2, tmpKey);
+         block3 = _mm256_xor_si256(pCounter256_3, tmpKey);
+         block4 = _mm256_xor_si256(pCounter256_4, tmpKey);
+         block5 = _mm256_xor_si256(pCounter256_5, tmpKey);
+         block6 = _mm256_xor_si256(pCounter256_6, tmpKey);
+         block7 = _mm256_xor_si256(pCounter256_7, tmpKey);
+         IncrementRegister256(pCounter256, increment16, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+1*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         block4 = _mm256_aesenc_epi128(block4, tmpKey);
+         block5 = _mm256_aesenc_epi128(block5, tmpKey);
+         block6 = _mm256_aesenc_epi128(block6, tmpKey);
+         block7 = _mm256_aesenc_epi128(block7, tmpKey);
+         IncrementRegister256(pCounter256_1, increment16, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+2*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         block4 = _mm256_aesenc_epi128(block4, tmpKey);
+         block5 = _mm256_aesenc_epi128(block5, tmpKey);
+         block6 = _mm256_aesenc_epi128(block6, tmpKey);
+         block7 = _mm256_aesenc_epi128(block7, tmpKey);
+         IncrementRegister256(pCounter256_2, increment16, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+3*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         block4 = _mm256_aesenc_epi128(block4, tmpKey);
+         block5 = _mm256_aesenc_epi128(block5, tmpKey);
+         block6 = _mm256_aesenc_epi128(block6, tmpKey);
+         block7 = _mm256_aesenc_epi128(block7, tmpKey);
+         IncrementRegister256(pCounter256_3, increment16, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+4*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         block4 = _mm256_aesenc_epi128(block4, tmpKey);
+         block5 = _mm256_aesenc_epi128(block5, tmpKey);
+         block6 = _mm256_aesenc_epi128(block6, tmpKey);
+         block7 = _mm256_aesenc_epi128(block7, tmpKey);
+         IncrementRegister256(pCounter256_4, increment16, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+5*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         block4 = _mm256_aesenc_epi128(block4, tmpKey);
+         block5 = _mm256_aesenc_epi128(block5, tmpKey);
+         block6 = _mm256_aesenc_epi128(block6, tmpKey);
+         block7 = _mm256_aesenc_epi128(block7, tmpKey);
+         IncrementRegister256(pCounter256_5, increment16, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+6*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         block4 = _mm256_aesenc_epi128(block4, tmpKey);
+         block5 = _mm256_aesenc_epi128(block5, tmpKey);
+         block6 = _mm256_aesenc_epi128(block6, tmpKey);
+         block7 = _mm256_aesenc_epi128(block7, tmpKey);
+         IncrementRegister256(pCounter256_6, increment16, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+7*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         block4 = _mm256_aesenc_epi128(block4, tmpKey);
+         block5 = _mm256_aesenc_epi128(block5, tmpKey);
+         block6 = _mm256_aesenc_epi128(block6, tmpKey);
+         block7 = _mm256_aesenc_epi128(block7, tmpKey);
+         IncrementRegister256(pCounter256_7, increment16, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+8*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         block4 = _mm256_aesenc_epi128(block4, tmpKey);
+         block5 = _mm256_aesenc_epi128(block5, tmpKey);
+         block6 = _mm256_aesenc_epi128(block6, tmpKey);
+         block7 = _mm256_aesenc_epi128(block7, tmpKey);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+9*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         block4 = _mm256_aesenc_epi128(block4, tmpKey);
+         block5 = _mm256_aesenc_epi128(block5, tmpKey);
+         block6 = _mm256_aesenc_epi128(block6, tmpKey);
+         block7 = _mm256_aesenc_epi128(block7, tmpKey);
+         if (RIJ_NR(pAES) >= 12) {
+            tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+10*16)));
+            block = _mm256_aesenc_epi128(block, tmpKey);
+            block1 = _mm256_aesenc_epi128(block1, tmpKey);
+            block2 = _mm256_aesenc_epi128(block2, tmpKey);
+            block3 = _mm256_aesenc_epi128(block3, tmpKey);
+            block4 = _mm256_aesenc_epi128(block4, tmpKey);
+            block5 = _mm256_aesenc_epi128(block5, tmpKey);
+            block6 = _mm256_aesenc_epi128(block6, tmpKey);
+            block7 = _mm256_aesenc_epi128(block7, tmpKey);
+            tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+11*16)));
+            block = _mm256_aesenc_epi128(block, tmpKey);
+            block1 = _mm256_aesenc_epi128(block1, tmpKey);
+            block2 = _mm256_aesenc_epi128(block2, tmpKey);
+            block3 = _mm256_aesenc_epi128(block3, tmpKey);
+            block4 = _mm256_aesenc_epi128(block4, tmpKey);
+            block5 = _mm256_aesenc_epi128(block5, tmpKey);
+            block6 = _mm256_aesenc_epi128(block6, tmpKey);
+            block7 = _mm256_aesenc_epi128(block7, tmpKey);
+            if (RIJ_NR(pAES) >= 14) {
+               tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+12*16)));
+               block = _mm256_aesenc_epi128(block, tmpKey);
+               block1 = _mm256_aesenc_epi128(block1, tmpKey);
+               block2 = _mm256_aesenc_epi128(block2, tmpKey);
+               block3 = _mm256_aesenc_epi128(block3, tmpKey);
+               block4 = _mm256_aesenc_epi128(block4, tmpKey);
+               block5 = _mm256_aesenc_epi128(block5, tmpKey);
+               block6 = _mm256_aesenc_epi128(block6, tmpKey);
+               block7 = _mm256_aesenc_epi128(block7, tmpKey);
+               tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+13*16)));
+               block = _mm256_aesenc_epi128(block, tmpKey);
+               block1 = _mm256_aesenc_epi128(block1, tmpKey);
+               block2 = _mm256_aesenc_epi128(block2, tmpKey);
+               block3 = _mm256_aesenc_epi128(block3, tmpKey);
+               block4 = _mm256_aesenc_epi128(block4, tmpKey);
+               block5 = _mm256_aesenc_epi128(block5, tmpKey);
+               block6 = _mm256_aesenc_epi128(block6, tmpKey);
+               block7 = _mm256_aesenc_epi128(block7, tmpKey);
+            }
+         }
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+RIJ_NR(pAES)*16))); 
+         block = _mm256_aesenclast_epi128(block, tmpKey);
+         block1 = _mm256_aesenclast_epi128(block1, tmpKey);
+         block2 = _mm256_aesenclast_epi128(block2, tmpKey);
+         block3 = _mm256_aesenclast_epi128(block3, tmpKey);
+         block4 = _mm256_aesenclast_epi128(block4, tmpKey);
+         block5 = _mm256_aesenclast_epi128(block5, tmpKey);
+         block6 = _mm256_aesenclast_epi128(block6, tmpKey);
+         block7 = _mm256_aesenclast_epi128(block7, tmpKey);
+
+         // set ciphertext 
+         cipherText = _mm256_xor_si256( _mm256_loadu_si256((void*)pSrc), block);
+         cipherText_1 = _mm256_xor_si256( _mm256_loadu_si256((void*)(pSrc+2*BLOCK_SIZE)), block1);
+         cipherText_2 = _mm256_xor_si256( _mm256_loadu_si256((void*)(pSrc+4*BLOCK_SIZE)), block2);
+         cipherText_3 = _mm256_xor_si256( _mm256_loadu_si256((void*)(pSrc+6*BLOCK_SIZE)), block3);
+         cipherText_4 = _mm256_xor_si256( _mm256_loadu_si256((void*)(pSrc+8*BLOCK_SIZE)), block4);
+         cipherText_5 = _mm256_xor_si256( _mm256_loadu_si256((void*)(pSrc+10*BLOCK_SIZE)), block5);
+         cipherText_6 = _mm256_xor_si256( _mm256_loadu_si256((void*)(pSrc+12*BLOCK_SIZE)), block6);
+         cipherText_7 = _mm256_xor_si256( _mm256_loadu_si256((void*)(pSrc+14*BLOCK_SIZE)), block7);
+         pSrc += 16*BLOCK_SIZE;
+         _mm256_storeu_si256((void*)pDst, cipherText);
+         _mm256_storeu_si256((void*)(pDst+2*BLOCK_SIZE), cipherText_1);
+         _mm256_storeu_si256((void*)(pDst+4*BLOCK_SIZE), cipherText_2);
+         _mm256_storeu_si256((void*)(pDst+6*BLOCK_SIZE), cipherText_3);
+         _mm256_storeu_si256((void*)(pDst+8*BLOCK_SIZE), cipherText_4);
+         _mm256_storeu_si256((void*)(pDst+10*BLOCK_SIZE), cipherText_5);
+         _mm256_storeu_si256((void*)(pDst+12*BLOCK_SIZE), cipherText_6);
+         _mm256_storeu_si256((void*)(pDst+14*BLOCK_SIZE), cipherText_7);
+         pDst += 16*BLOCK_SIZE;
+
+         // hash calculation stage
+         rpHash[0] = _mm256_xor_si256(rpHash[0], _mm256_shuffle_epi8(cipherText, shuff_mask_256));
+         rpHash[1] = _mm256_shuffle_epi8(cipherText_1, shuff_mask_256);
+         rpHash[2] = _mm256_shuffle_epi8(cipherText_2, shuff_mask_256);
+         rpHash[3] = _mm256_shuffle_epi8(cipherText_3, shuff_mask_256);
+         rpHash[4] = _mm256_shuffle_epi8(cipherText_4, shuff_mask_256);
+         rpHash[5] = _mm256_shuffle_epi8(cipherText_5, shuff_mask_256);
+         rpHash[6] = _mm256_shuffle_epi8(cipherText_6, shuff_mask_256);
+         rpHash[7] = _mm256_shuffle_epi8(cipherText_7, shuff_mask_256);
+         resultHash = avx2_clmul_gcm16(rpHash, HashKey);
+
+         len -= 16*BLOCK_SIZE;
+      } // while(len >= 16*BLOCK_SIZE)
+
+      if (len >= 8*BLOCK_SIZE) {
+         // encrypt stage
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES))));
+         block = _mm256_xor_si256(pCounter256, tmpKey);
+         block1 = _mm256_xor_si256(pCounter256_1, tmpKey);
+         block2 = _mm256_xor_si256(pCounter256_2, tmpKey);
+         block3 = _mm256_xor_si256(pCounter256_3, tmpKey);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+1*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         IncrementRegister256(pCounter256, increment8, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+2*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+3*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         IncrementRegister256(pCounter256_1, increment8, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+4*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+5*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         IncrementRegister256(pCounter256_2, increment8, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+6*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+7*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+8*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         IncrementRegister256(pCounter256_3, increment8, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+9*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         block2 = _mm256_aesenc_epi128(block2, tmpKey);
+         block3 = _mm256_aesenc_epi128(block3, tmpKey);
+         if (RIJ_NR(pAES) >= 12) {
+            tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+10*16)));
+            block = _mm256_aesenc_epi128(block, tmpKey);
+            block1 = _mm256_aesenc_epi128(block1, tmpKey);
+            block2 = _mm256_aesenc_epi128(block2, tmpKey);
+            block3 = _mm256_aesenc_epi128(block3, tmpKey);
+            tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+11*16)));
+            block = _mm256_aesenc_epi128(block, tmpKey);
+            block1 = _mm256_aesenc_epi128(block1, tmpKey);
+            block2 = _mm256_aesenc_epi128(block2, tmpKey);
+            block3 = _mm256_aesenc_epi128(block3, tmpKey);
+            if (RIJ_NR(pAES) >= 14) {
+               tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+12*16)));
+               block = _mm256_aesenc_epi128(block, tmpKey);
+               block1 = _mm256_aesenc_epi128(block1, tmpKey);
+               block2 = _mm256_aesenc_epi128(block2, tmpKey);
+               block3 = _mm256_aesenc_epi128(block3, tmpKey);
+               tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+13*16)));
+               block = _mm256_aesenc_epi128(block, tmpKey);
+               block1 = _mm256_aesenc_epi128(block1, tmpKey);
+               block2 = _mm256_aesenc_epi128(block2, tmpKey);
+               block3 = _mm256_aesenc_epi128(block3, tmpKey);
             }
-         } while(len >= STEP_SIZE);
-
-         // loading temporary data to memory
-         _mm_storeu_si128((void*)pECounter, _mm256_extractf128_si256(pECounter256, 1));
-         _mm_storeu_si128((void*)pCounter, _mm256_castsi256_si128(pCounter256));
-
-         // combine hash
-         __m128i GHash0 = _mm256_extractf128_si256(rpHash0, 0);
-         __m128i GHash1 = _mm256_extractf128_si256(rpHash0, 1);
-         __m128i GHash2 = _mm256_extractf128_si256(rpHash1, 0);
-         __m128i GHash3 = _mm256_extractf128_si256(rpHash1, 1);
-         
-         sse_clmul_gcm(&GHash0, &HashKey4); //GHash0 = GHash0 * (HashKey^4)<<1 mod poly
-         sse_clmul_gcm(&GHash1, &HashKey2); //GHash1 = GHash1 * (HashKey^2)<<1 mod poly
-         sse_clmul_gcm(&GHash2, &HashKey0); //GHash2 = GHash2 * (HashKey^1)<<1 mod poly
-         GHash3 = _mm_xor_si128(GHash3, GHash1);
-         GHash3 = _mm_xor_si128(GHash3, GHash2);
-         
-         sse_clmul_gcm(&GHash3, &HashKey0); //GHash3 = GHash3 * (HashKey)<<1 mod poly
-         GHash3 = _mm_xor_si128(GHash3, GHash0);
-         GHash3 = _mm_shuffle_epi8(GHash3, shuff_mask_128);
-         _mm_storeu_si128((void*)(AESGCM_GHASH(pState)), GHash3);
-
-         // HKeys zeroizing
-         _mm_storeu_si128(&HashKey0, zero_128);
-         _mm_storeu_si128(&HashKey2, zero_128);
-         _mm_storeu_si128(&HashKey4, zero_128);
-         _mm256_storeu_si256(&HKey, zero_256);
-         _mm256_storeu_si256(&HKeyKaratsuba, zero_256);
+         }
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+RIJ_NR(pAES)*16))); 
+         block = _mm256_aesenclast_epi128(block, tmpKey);
+         block1 = _mm256_aesenclast_epi128(block1, tmpKey);
+         block2 = _mm256_aesenclast_epi128(block2, tmpKey);
+         block3 = _mm256_aesenclast_epi128(block3, tmpKey);
+
+         // set ciphertext 
+         cipherText = _mm256_xor_si256( _mm256_loadu_si256((void*)pSrc), block);
+         pSrc += 2*BLOCK_SIZE;
+         cipherText_1 = _mm256_xor_si256( _mm256_loadu_si256((void*)pSrc), block1);
+         pSrc += 2*BLOCK_SIZE;
+         cipherText_2 = _mm256_xor_si256( _mm256_loadu_si256((void*)pSrc), block2);
+         pSrc += 2*BLOCK_SIZE;
+         cipherText_3 = _mm256_xor_si256( _mm256_loadu_si256((void*)pSrc), block3);
+         pSrc += 2*BLOCK_SIZE;
+         _mm256_storeu_si256((void*)pDst, cipherText);
+         pDst += 2*BLOCK_SIZE;
+         _mm256_storeu_si256((void*)pDst, cipherText_1);
+         pDst += 2*BLOCK_SIZE;
+         _mm256_storeu_si256((void*)pDst, cipherText_2);
+         pDst += 2*BLOCK_SIZE;
+         _mm256_storeu_si256((void*)pDst, cipherText_3);
+         pDst += 2*BLOCK_SIZE;
+
+         // hash calculation stage
+         rpHash[0] = _mm256_xor_si256(rpHash[0], _mm256_shuffle_epi8(cipherText, shuff_mask_256));
+         rpHash[1] = _mm256_shuffle_epi8(cipherText_1, shuff_mask_256);
+         rpHash[2] = _mm256_shuffle_epi8(cipherText_2, shuff_mask_256);
+         rpHash[3] = _mm256_shuffle_epi8(cipherText_3, shuff_mask_256);
+         resultHash = avx2_clmul_gcm8(rpHash, HashKey);
+
+         len -= 8*BLOCK_SIZE;
+      } //if (len >= 8*BLOCK_SIZE)
+
+      if (len >= 4*BLOCK_SIZE) {
+         // encrypt stage
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES))));
+         block = _mm256_xor_si256(pCounter256, tmpKey);
+         block1 = _mm256_xor_si256(pCounter256_1, tmpKey);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+1*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+2*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         IncrementRegister256(pCounter256, increment4, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+3*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+4*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+5*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+6*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         IncrementRegister256(pCounter256_1, increment4, shuffle_mask);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+7*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+8*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+9*16)));
+         block = _mm256_aesenc_epi128(block, tmpKey);
+         block1 = _mm256_aesenc_epi128(block1, tmpKey);
+         if (RIJ_NR(pAES) >= 12) {
+            tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+10*16)));
+            block = _mm256_aesenc_epi128(block, tmpKey);
+            block1 = _mm256_aesenc_epi128(block1, tmpKey);
+            tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+11*16)));
+            block = _mm256_aesenc_epi128(block, tmpKey);
+            block1 = _mm256_aesenc_epi128(block1, tmpKey);
+            if (RIJ_NR(pAES) >= 14) {
+               tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+12*16)));
+               block = _mm256_aesenc_epi128(block, tmpKey);
+               block1 = _mm256_aesenc_epi128(block1, tmpKey);
+               tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+13*16)));
+               block = _mm256_aesenc_epi128(block, tmpKey);
+               block1 = _mm256_aesenc_epi128(block1, tmpKey);
+            }
+         }
+         tmpKey = _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+RIJ_NR(pAES)*16))); 
+         block = _mm256_aesenclast_epi128(block, tmpKey);
+         block1 = _mm256_aesenclast_epi128(block1, tmpKey);
+
+         // set ciphertext 
+         cipherText = _mm256_xor_si256( _mm256_loadu_si256((void*)pSrc), block);
+         pSrc += 2*BLOCK_SIZE;
+         cipherText_1 = _mm256_xor_si256( _mm256_loadu_si256((void*)pSrc), block1);
+         pSrc += 2*BLOCK_SIZE;
+         _mm256_storeu_si256((void*)pDst, cipherText);
+         pDst += 2*BLOCK_SIZE;
+         _mm256_storeu_si256((void*)pDst, cipherText_1);
+         pDst += 2*BLOCK_SIZE;
+         // hash calculation stage
+         rpHash[0] = _mm256_xor_si256(rpHash[0], _mm256_shuffle_epi8(cipherText, shuff_mask_256));
+         rpHash[1] = _mm256_shuffle_epi8(cipherText_1, shuff_mask_256);
+         resultHash = avx2_clmul_gcm4(rpHash, HashKey);
+         len -= 4*BLOCK_SIZE;
+      } //if (len >= 4*BLOCK_SIZE)
+
+      if (len >= 2*BLOCK_SIZE) {
+         block = _mm256_xor_si256(pCounter256, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+1*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+2*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+3*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+4*16))));
+         IncrementRegister256(pCounter256, increment2, shuffle_mask);
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+5*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+6*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+7*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+8*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+9*16))));
+         if (RIJ_NR(pAES) >= 12) {
+            block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+10*16))));
+            block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+11*16))));
+            if (RIJ_NR(pAES) >= 14) {
+               block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+12*16))));
+               block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+13*16))));
+            }
+         } 
+         block = _mm256_aesenclast_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+RIJ_NR(pAES)*16))));
+
+         // set ciphertext 
+         cipherText = _mm256_xor_si256( _mm256_loadu_si256((void*)pSrc), block);
+         pSrc += 2*BLOCK_SIZE;
+         _mm256_storeu_si256((void*)pDst, cipherText);
+         pDst += 2*BLOCK_SIZE;
+         // hash calculation stage
+         rpHash[0] = _mm256_xor_si256(rpHash[0], _mm256_shuffle_epi8(cipherText, shuff_mask_256));
+         resultHash = avx2_clmul_gcm2(rpHash, HashKey);
+         len -= 2*BLOCK_SIZE;
       }
 
-      Ipp8u* pHashedData = pDst;
-      int hashedDataLen = len;
+      // encryption for the tail (1-3 block)
+      while (len >= BLOCK_SIZE) {
+         block = _mm256_xor_si256(pCounter256, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+1*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+2*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+3*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+4*16))));
+         IncrementRegister256(pCounter256, increment1, shuffle_mask);
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+5*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+6*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+7*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+8*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+9*16))));
+         if (RIJ_NR(pAES) >= 12) {
+            block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+10*16))));
+            block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+11*16))));
+            if (RIJ_NR(pAES) >= 14) {
+               block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+12*16))));
+               block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+13*16))));
+            }
+         } 
+         block = _mm256_aesenclast_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+RIJ_NR(pAES)*16))));
 
-      // encryption for the tail (1-3 blocks)
-      while(len >= BLOCK_SIZE) {
-         aes_encoder_avx2vaes_sb(pCounter, pECounter, RIJ_NR(pAES), rkeys);
-         XorBlock16(pSrc, pECounter, pDst);
+         // set ciphertext 
+         cipherText = _mm256_xor_si256(_mm256_loadu_si256((void*)pSrc), block);
          pSrc += BLOCK_SIZE;
+         _mm_storeu_si128((void*)pDst, _mm256_castsi256_si128(cipherText));
          pDst += BLOCK_SIZE;
+         // hash calculation stage
+         HashKey[0] = _mm256_setr_m128i(_mm_loadu_si128((void*)(AESGCM_HKEY(pState))), _mm_loadu_si128((void*)(AESGCM_HKEY(pState))));
+         rpHash[0] = _mm256_xor_si256(rpHash[0], _mm256_shuffle_epi8(cipherText, shuff_mask_256));
+         resultHash = avx2_clmul_gcm(rpHash, HashKey);
          len -= BLOCK_SIZE;
-         IncrementCounter32(pCounter);
       }
-      aes_encoder_avx2vaes_sb(pCounter, pECounter, RIJ_NR(pAES), rkeys);
 
-      // hash calculation for the tail (1-3 blocks)
-      if (hashedDataLen >= BLOCK_SIZE)
-         AesGcmAuth_avx(AESGCM_GHASH(pState), pHashedData, hashedDataLen, AESGCM_HKEY(pState), AesGcmConst_table);
+      //encrypt the remainder
+      block = _mm256_xor_si256(pCounter256, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)))));
+      block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+1*16))));
+      block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+2*16))));
+      block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+3*16))));
+      block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+4*16))));
+      block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+5*16))));
+      block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+6*16))));
+      block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+7*16))));
+      block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+8*16))));
+      block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+9*16))));
+      if (RIJ_NR(pAES) >= 12) {
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+10*16))));
+         block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+11*16))));
+         if (RIJ_NR(pAES) >= 14) {
+            block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+12*16))));
+            block = _mm256_aesenc_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+13*16))));
+         }
+      } 
+      block = _mm256_aesenclast_epi128(block, _mm256_broadcastsi128_si256(_mm_loadu_si128((void*)(RIJ_EKEYS(pAES)+RIJ_NR(pAES)*16))));
 
-      // keys zeroizing
-      for (int i = 0; i < RIJ_NR(pAES) + 1; i++)
-         _mm256_storeu_si256((rkeys+i), zero_256);
-      _mm_storeu_si128(&tmp_keys_128, zero_128);
-   }
-}
+      // loand data to the memory
+      _mm_storeu_si128((void*)pECounter, _mm256_castsi256_si128(block));
+      _mm_storeu_si128((void*)pCounter, _mm256_castsi256_si128(pCounter256));
+      resultHash = _mm_shuffle_epi8(resultHash, shuff_mask_128);
+      _mm_storeu_si128((void*)(AESGCM_GHASH(pState)), resultHash);
 
+      // HKeys zeroizing
+      for (int i = 0; i < 8; i++)
+         _mm256_storeu_si256((HashKey+i), zero_256);
+      tmpKey = _mm256_setzero_si256();
+   } // if (len < 256)
+} 
 #endif /* #if (_IPP==_IPP_H9) || (_IPP32E==_IPP32E_L9) */
diff --git a/sources/ippcp/pcpaes_ccmstart.c b/sources/ippcp/pcpaes_ccmstart.c
index 9eebde71..bde0f5dd 100644
--- a/sources/ippcp/pcpaes_ccmstart.c
+++ b/sources/ippcp/pcpaes_ccmstart.c
@@ -16,7 +16,7 @@
 
 /*
 //     Intel(R) Integrated Performance Primitives. Cryptography Primitives.
-// 
+//
 //     Context:
 //        ippsAES_CCMStart()
 //
@@ -34,7 +34,7 @@
 /*F*
 //    Name: ippsAES_CCMStart
 //
-// Purpose: Start the process (encryption+generation) or (decryption+veryfication).
+// Purpose: Start the process (encryption+generation) or (decryption+verification).
 //
 // Returns:                Reason:
 //    ippStsNullPtrErr        pState == NULL
diff --git a/sources/ippcp/pcpaes_cfbdecrypt_vaes512.c b/sources/ippcp/pcpaes_cfbdecrypt_vaes512.c
index c3e94a50..a29a2528 100644
--- a/sources/ippcp/pcpaes_cfbdecrypt_vaes512.c
+++ b/sources/ippcp/pcpaes_cfbdecrypt_vaes512.c
@@ -37,14 +37,14 @@
 #pragma warning(disable: 4310) // zmmintrin.h bug: truncation of constant value
 #endif
 
-__INLINE Ipp64u broadcast_16to64(Ipp16u mask16)
+__IPPCP_INLINE Ipp64u broadcast_16to64(Ipp16u mask16)
 {
    Ipp64u mask64 = (Ipp64u)mask16;
    mask64 = (mask64 << 48) | (mask64 << 32) | (mask64 << 16) | mask64;
    return mask64;
 }
 
-__INLINE __m512i getInputBlocks(__m128i * const currentState, const __m512i * const pCipherBlocks, __mmask16 blocksCompressMask)
+__IPPCP_INLINE __m512i getInputBlocks(__m128i * const currentState, const __m512i * const pCipherBlocks, __mmask16 blocksCompressMask)
 {
    // extract 128-bit cipher blocks
    __m128i c0 = _mm512_extracti64x2_epi64(*pCipherBlocks, 0);
diff --git a/sources/ippcp/pcpaes_cmac_stuff.h b/sources/ippcp/pcpaes_cmac_stuff.h
index 13f26813..a2eaba70 100644
--- a/sources/ippcp/pcpaes_cmac_stuff.h
+++ b/sources/ippcp/pcpaes_cmac_stuff.h
@@ -15,11 +15,11 @@
 *************************************************************************/
 
 /*
-// 
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     AES-CMAC Functions
-// 
+//
 //  Contents:
 //        init()
 //
@@ -34,7 +34,7 @@
 #if !defined(_PCP_AES_CMAC_STUFF_H_)
 #define _PCP_AES_CMAC_STUFF_H_
 
-__INLINE int cpSizeofCtx_AESCMAC(void)
+__IPPCP_INLINE int cpSizeofCtx_AESCMAC(void)
 {
    return sizeof(IppsAES_CMACState);
 }
diff --git a/sources/ippcp/pcpaes_cmacupdate.c b/sources/ippcp/pcpaes_cmacupdate.c
index b21266c6..d18e2252 100644
--- a/sources/ippcp/pcpaes_cmacupdate.c
+++ b/sources/ippcp/pcpaes_cmacupdate.c
@@ -15,11 +15,11 @@
 *************************************************************************/
 
 /*
-// 
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     AES-CMAC Functions
-// 
+//
 //  Contents:
 //        ippsAES_CMACUpdate()
 //
@@ -181,12 +181,12 @@ IPPFUN(IppStatus, ippsAES_CMACUpdate,(const Ipp8u* pSrc, int len, IppsAES_CMACSt
       }
 
       /*
-      // remaind
+      // remainder
       */
       if(len) {
          /* workaround to avoid false positive stringop-overflow error on gcc10.1 and gcc11.1 */
          len = ( IPP_MIN(len, MBS_RIJ128) );
-         
+
          CopyBlock(pSrc, (Ipp8u*)(&CMAC_BUFF(pState)), len);
          /* update internal buffer filling */
          CMAC_INDX(pState) += len;
diff --git a/sources/ippcp/pcpaes_ctr_process.h b/sources/ippcp/pcpaes_ctr_process.h
index 445fda11..28f44ee1 100644
--- a/sources/ippcp/pcpaes_ctr_process.h
+++ b/sources/ippcp/pcpaes_ctr_process.h
@@ -61,7 +61,7 @@
 //    counter will updated on return
 //
 */
-__INLINE void MaskCounter128(Ipp8u* pMaskIV, int ctrBtSize)
+__IPPCP_INLINE void MaskCounter128(Ipp8u* pMaskIV, int ctrBtSize)
 {
    /* construct ctr mask */
    int maskPosition = (MBS_RIJ128*8-ctrBtSize)/8;
@@ -99,15 +99,15 @@ IppStatus cpProcessAES_ctr(const Ipp8u* pSrc, Ipp8u* pDst, int dataLen,
    if(ctrNumBitSize < (8 * (int)sizeof(int) - 5))
    {
       /*
-      // dataLen is int, and it is always positive   
-      // data blocks number compute from dataLen     
-      // by dividing it to MBS_RIJ128 = 16           
-      // and additing 1 if dataLen % 16 != 0         
-      // so if ctrNumBitSize >= 8 * sizeof(int) - 5                      
-      // function can process data with any possible 
-      // passed dataLen without counter overflow     
+      // dataLen is int, and it is always positive
+      // data blocks number compute from dataLen
+      // by dividing it to MBS_RIJ128 = 16
+      // and additing 1 if dataLen % 16 != 0
+      // so if ctrNumBitSize >= 8 * sizeof(int) - 5
+      // function can process data with any possible
+      // passed dataLen without counter overflow
       */
-      
+
       int dataBlocksNum = dataLen >> 4;
       if(dataLen & 15){
          dataBlocksNum++;
diff --git a/sources/ippcp/pcpaes_ctrencrypt_rij128pipe_vaes512.c b/sources/ippcp/pcpaes_ctrencrypt_rij128pipe_vaes512.c
index 7dd00820..eb60083f 100644
--- a/sources/ippcp/pcpaes_ctrencrypt_rij128pipe_vaes512.c
+++ b/sources/ippcp/pcpaes_ctrencrypt_rij128pipe_vaes512.c
@@ -54,7 +54,7 @@ static __ALIGN64 Ipp64u nextIncLoMask[]  = { 0x0, 0x4, 0x0, 0x4, 0x0, 0x4, 0x0,
 static __ALIGN64 Ipp64u incLoByOneMask[] = { 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1 };
 static __ALIGN64 Ipp64u incHiByOneMask[] = { 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0 };
 
-__INLINE __m512i adcLo_epi64(__m512i a, __m512i b)
+__IPPCP_INLINE __m512i adcLo_epi64(__m512i a, __m512i b)
 {
    a = _mm512_add_epi64(a, b);
    // check overflow in each low 64-bit of 128-bit numbers
@@ -65,7 +65,7 @@ __INLINE __m512i adcLo_epi64(__m512i a, __m512i b)
    return a;
 }
 
-__INLINE __m512i applyNonce(__m512i a, __m512i ctrBitMask, __m512i templateCtr)
+__IPPCP_INLINE __m512i applyNonce(__m512i a, __m512i ctrBitMask, __m512i templateCtr)
 {
    a = _mm512_shuffle_epi8(a, M512(swapBytes));
    a = _mm512_and_epi64(a, ctrBitMask);
diff --git a/sources/ippcp/pcpaes_gcm_internal_func.c b/sources/ippcp/pcpaes_gcm_internal_func.c
new file mode 100644
index 00000000..43ee84eb
--- /dev/null
+++ b/sources/ippcp/pcpaes_gcm_internal_func.c
@@ -0,0 +1,257 @@
+/*************************************************************************
+* Copyright (C) 2024 Intel Corporation
+*
+* Licensed under the Apache License,  Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* 	http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law  or agreed  to  in  writing,  software
+* distributed under  the License  is  distributed  on  an  "AS IS"  BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the  specific  language  governing  permissions  and
+* limitations under the License.
+*************************************************************************/
+
+/*
+//
+//  Purpose:
+//     Cryptography Primitive.
+//        * Initialization functions for internal methods and pointers inside AES-GCM context;
+//        * AES-GCM encryption kernels with the conditional noise injections mechanism;
+//
+*/
+
+#include "pcpaes_gcm_internal_func.h"
+#include "aes_gcm_avx512.h"
+#include "owncp.h"
+#include "pcpaesm.h"
+#include "pcptool.h"
+
+#if (_IPP32E >= _IPP32E_K0)
+#include "pcpaesauthgcm_avx512.h"
+#else
+#include "pcpaesauthgcm.h"
+#endif /* #if(_IPP32E>=_IPP32E_K0) */
+
+/*
+ * This function dispatches to the right internal methods and sets pointers to them inside the AES-GCM state.
+ */
+IPP_OWN_DEFN(void, cpAesGCM_setup_ptrs_and_methods, (IppsAES_GCMState * pState, Ipp64u keyByteLen))
+{
+#if (_IPP32E >= _IPP32E_K0)
+   if (IsFeatureEnabled(ippCPUID_AVX512VAES) && IsFeatureEnabled(ippCPUID_AVX512VCLMUL)) {
+      switch (keyByteLen) {
+      case 16:
+         AES_GCM_ENCRYPT_UPDATE(pState) = aes_gcm_enc_128_update_vaes_avx512;
+         AES_GCM_DECRYPT_UPDATE(pState) = aes_gcm_dec_128_update_vaes_avx512;
+         AES_GCM_GET_TAG(pState)        = aes_gcm_gettag_128_vaes_avx512;
+         break;
+      case 24:
+         AES_GCM_ENCRYPT_UPDATE(pState) = aes_gcm_enc_192_update_vaes_avx512;
+         AES_GCM_DECRYPT_UPDATE(pState) = aes_gcm_dec_192_update_vaes_avx512;
+         AES_GCM_GET_TAG(pState)        = aes_gcm_gettag_192_vaes_avx512;
+         break;
+      case 32:
+         AES_GCM_ENCRYPT_UPDATE(pState) = aes_gcm_enc_256_update_vaes_avx512;
+         AES_GCM_DECRYPT_UPDATE(pState) = aes_gcm_dec_256_update_vaes_avx512;
+         AES_GCM_GET_TAG(pState)        = aes_gcm_gettag_256_vaes_avx512;
+         break;
+      }
+
+      AES_GCM_IV_UPDATE(pState)   = aes_gcm_iv_hash_update_vaes512;
+      AES_GCM_IV_FINALIZE(pState) = aes_gcm_iv_hash_finalize_vaes512;
+      AES_GCM_AAD_UPDATE(pState)  = aes_gcm_aad_hash_update_vaes512;
+      AES_GCM_GMUL(pState)        = aes_gcm_gmult_vaes512;
+   } else {
+      switch (keyByteLen) {
+      case 16:
+         AES_GCM_ENCRYPT_UPDATE(pState) = aes_gcm_enc_128_update_avx512;
+         AES_GCM_DECRYPT_UPDATE(pState) = aes_gcm_dec_128_update_avx512;
+         AES_GCM_GET_TAG(pState)        = aes_gcm_gettag_128_avx512;
+         break;
+      case 24:
+         AES_GCM_ENCRYPT_UPDATE(pState) = aes_gcm_enc_192_update_avx512;
+         AES_GCM_DECRYPT_UPDATE(pState) = aes_gcm_dec_192_update_avx512;
+         AES_GCM_GET_TAG(pState)        = aes_gcm_gettag_192_avx512;
+         break;
+      case 32:
+         AES_GCM_ENCRYPT_UPDATE(pState) = aes_gcm_enc_256_update_avx512;
+         AES_GCM_DECRYPT_UPDATE(pState) = aes_gcm_dec_256_update_avx512;
+         AES_GCM_GET_TAG(pState)        = aes_gcm_gettag_256_avx512;
+         break;
+      }
+
+      AES_GCM_IV_UPDATE(pState)   = aes_gcm_iv_hash_update_avx512;
+      AES_GCM_IV_FINALIZE(pState) = aes_gcm_iv_hash_finalize_avx512;
+      AES_GCM_AAD_UPDATE(pState)  = aes_gcm_aad_hash_update_avx512;
+      AES_GCM_GMUL(pState)        = aes_gcm_gmult_avx512;
+   }
+#else
+   IPP_UNREFERENCED_PARAMETER(keyByteLen);
+
+   /* set up:
+   // - ghash function
+   // - authentication function
+   */
+   AESGCM_HASH(pState) = AesGcmMulGcm_table2K_ct; // AesGcmMulGcm_table2K;
+   AESGCM_AUTH(pState) = AesGcmAuth_table2K_ct;   // AesGcmAuth_table2K;
+   AESGCM_ENC(pState)  = wrpAesGcmEnc_table2K;
+   AESGCM_DEC(pState)  = wrpAesGcmDec_table2K;
+
+#if (_IPP>=_IPP_P8) || (_IPP32E>=_IPP32E_Y8)
+// the dead code that currently is unused
+//#if (_IPP32E >= _IPP32E_K0)
+//   if (IsFeatureEnabled(ippCPUID_AVX512VAES)) {
+//      AESGCM_HASH(pState) = AesGcmMulGcm_vaes;
+//      AESGCM_AUTH(pState) = AesGcmAuth_vaes;
+//      AESGCM_ENC(pState)  = AesGcmEnc_vaes;
+//      AESGCM_DEC(pState)  = AesGcmDec_vaes;
+//   } else
+//#endif /* #if(_IPP32E>=_IPP32E_K0) */
+      if (IsFeatureEnabled(ippCPUID_AES | ippCPUID_CLMUL)) {
+         AESGCM_HASH(pState) = AesGcmMulGcm_avx;
+         AESGCM_AUTH(pState) = AesGcmAuth_avx;
+         AESGCM_ENC(pState)  = wrpAesGcmEnc_avx;
+         AESGCM_DEC(pState)  = wrpAesGcmDec_avx;
+      }
+#if (_IPP==_IPP_H9) || (_IPP32E==_IPP32E_L9)
+      if (IsFeatureEnabled(ippCPUID_AVX2VAES | ippCPUID_AVX2VCLMUL)) {
+         AESGCM_HASH(pState) = AesGcmMulGcm_avx;
+         AESGCM_AUTH(pState) = AesGcmAuth_avx;
+         AESGCM_ENC(pState)  =  AesGcmEnc_vaes_avx2;
+         AESGCM_DEC(pState)  = AesGcmDec_vaes_avx2;
+      }
+#endif /* #if(_IPP==_IPP_H9) || (_IPP32E==_IPP32E_L9) */
+#endif /* #if(_IPP>=_IPP_P8) || (_IPP32E>=_IPP32E_Y8) */
+
+#endif /* #if(_IPP32E>=_IPP32E_K0) */
+}
+
+/*!
+ * This function computes AES-GCM encryption kernel with the the conditional noise injections mechanism (Mistletoe3 
+ * attack mitigation).
+ *
+ * Parameters:
+ *    \param[in] pSrc      Pointer to plaintext.
+ *    \param[in] pDst      Pointer to ciphertext.
+ *    \param[in] ptxt_len  Length of the plaintext in bytes.
+ *    \param[in] pState    Pointer to the AES-GCM context.
+ */
+IPP_OWN_DEFN(void, condNoisedGCMEncryption, (const Ipp8u* pSrc, Ipp8u* pDst, int ptxt_len,
+                                                      IppsAES_GCMState* pState))
+{
+/* Identify the encryption method. It's different for different platforms */
+#if(_IPP32E>=_IPP32E_K0)
+   EncryptUpdate_ encFunc = AES_GCM_ENCRYPT_UPDATE(pState);
+#else
+   Encrypt_ encFunc = AESGCM_ENC(pState);
+#endif
+
+#if (_AES_PROB_NOISE == _FEATURE_ON_)
+   /* Mistletoe3 mitigation */
+   cpAESNoiseParams *params = (cpAESNoiseParams*)&AESGCM_NOISE_PARAMS(pState);
+   if (AES_NOISE_LEVEL(params) > 0) {
+      /* Number of bytes allowed for operation without adding noise */
+      int chunk_size;
+      /* Number of bytes remaining for operation */
+      int remaining_size = ptxt_len;
+
+      while (remaining_size > 0) {
+         /* How many bytes to encrypt in this operation */
+         chunk_size = (remaining_size >= MISTLETOE3_MAX_CHUNK_SIZE) ? 
+                      MISTLETOE3_MAX_CHUNK_SIZE : 
+                      remaining_size;
+
+      #if(_IPP32E>=_IPP32E_K0)
+         encFunc(&AES_GCM_KEY_DATA(pState), &AES_GCM_CONTEXT_DATA(pState), 
+                 pDst, pSrc, (Ipp64u)chunk_size);
+      #else
+         encFunc(pDst, pSrc, chunk_size, pState);
+      #endif
+
+         cpAESRandomNoise(NULL,
+                  MISTLETOE3_BASE_NOISE_LEVEL + AES_NOISE_LEVEL(params),
+                  MISTLETOE3_NOISE_RATE,
+                  &AES_NOISE_RAND(params));
+
+         pSrc += chunk_size;
+         pDst += chunk_size;
+         remaining_size -= chunk_size;
+      }
+   } else
+#endif
+   { /* Process without noise injection */
+   #if(_IPP32E>=_IPP32E_K0)
+      encFunc(&AES_GCM_KEY_DATA(pState), &AES_GCM_CONTEXT_DATA(pState), 
+              pDst, pSrc, (Ipp64u)ptxt_len);
+   #else
+      encFunc(pDst, pSrc, ptxt_len, pState);
+   #endif
+   }
+}
+
+
+/*!
+ * This function computes AES-GCM decryption kernel with the the conditional noise injections mechanism (Mistletoe3 
+ * attack mitigation).
+ *
+ * Parameters:
+ *    \param[in] pSrc      Pointer to ciphertext.
+ *    \param[in] pDst      Pointer to deciphered text.
+ *    \param[in] ctxt_len  Length of the ciphertext in bytes.
+ *    \param[in] pState    Pointer to the AES-GCM context.
+ */
+IPP_OWN_DEFN(void, condNoisedGCMDecryption, (const Ipp8u* pSrc, Ipp8u* pDst, int ctxt_len, 
+                                                      IppsAES_GCMState* pState))
+{
+/* Identify the decryption method. It's different for different platforms */
+#if(_IPP32E>=_IPP32E_K0)
+   DecryptUpdate_ decFunc = AES_GCM_DECRYPT_UPDATE(pState);
+#else
+   Decrypt_ decFunc = AESGCM_DEC(pState);
+#endif
+
+#if (_AES_PROB_NOISE == _FEATURE_ON_)
+   /* Mistletoe3 mitigation */
+   cpAESNoiseParams *params = (cpAESNoiseParams*)&AESGCM_NOISE_PARAMS(pState);
+   if (AES_NOISE_LEVEL(params) > 0) {
+      /* Number of bytes allowed for operation without adding noise */
+      int chunk_size;
+      /* Number of bytes remaining for operation */
+      int remaining_size = ctxt_len;
+
+      while (remaining_size > 0) {
+         /* How many bytes to decrypt in this operation */
+         chunk_size = (remaining_size >= MISTLETOE3_MAX_CHUNK_SIZE) ?
+                       MISTLETOE3_MAX_CHUNK_SIZE : 
+                       remaining_size;
+
+      #if(_IPP32E>=_IPP32E_K0)
+         decFunc(&AES_GCM_KEY_DATA(pState), &AES_GCM_CONTEXT_DATA(pState), 
+                 pDst, pSrc, (Ipp64u)chunk_size);
+      #else
+         decFunc(pDst, pSrc, chunk_size, pState);
+      #endif
+
+         cpAESRandomNoise(NULL,
+                  MISTLETOE3_BASE_NOISE_LEVEL + AES_NOISE_LEVEL(params),
+                  MISTLETOE3_NOISE_RATE,
+                  &AES_NOISE_RAND(params));
+
+         pSrc += chunk_size;
+         pDst += chunk_size;
+         remaining_size -= chunk_size;
+      }
+   } else
+#endif
+   { /* Process without noise injection */
+   #if(_IPP32E>=_IPP32E_K0)
+      decFunc(&AES_GCM_KEY_DATA(pState), &AES_GCM_CONTEXT_DATA(pState), 
+              pDst, pSrc, (Ipp64u)ctxt_len);
+   #else
+      decFunc(pDst, pSrc, ctxt_len, pState);
+   #endif
+   }
+}
diff --git a/sources/ippcp/pcpaes_gcm_internal_func.h b/sources/ippcp/pcpaes_gcm_internal_func.h
new file mode 100644
index 00000000..2bd5c627
--- /dev/null
+++ b/sources/ippcp/pcpaes_gcm_internal_func.h
@@ -0,0 +1,41 @@
+/*************************************************************************
+* Copyright (C) 2024 Intel Corporation
+*
+* Licensed under the Apache License,  Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* 	http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law  or agreed  to  in  writing,  software
+* distributed under  the License  is  distributed  on  an  "AS IS"  BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the  specific  language  governing  permissions  and
+* limitations under the License.
+*************************************************************************/
+
+/*
+//
+//  Purpose:
+//     Cryptography Primitive.
+//        Initialization functions for internal methods and pointers inside AES-GCM context;
+//        AES-GCM encryption kernels with the conditional noise injections mechanism;
+//
+*/
+
+#if !defined(_PCP_AES_GCM_INTERNAL_FUNC_H)
+#define _PCP_AES_GCM_INTERNAL_FUNC_H
+
+#include "owndefs.h"
+#include "pcpaes_internal_func.h"
+
+#define cpAesGCM_setup_ptrs_and_methods OWNAPI(cpAesGCM_setup_ptrs_and_methods)
+IPP_OWN_DECL(void, cpAesGCM_setup_ptrs_and_methods, (IppsAES_GCMState * pCtx, Ipp64u keyByteLen))
+
+#define condNoisedGCMEncryption OWNAPI(condNoisedGCMEncryption)
+IPP_OWN_DECL(void, condNoisedGCMEncryption, (const Ipp8u* pSrc, Ipp8u* pDst, int ptxt_len, IppsAES_GCMState* pState))
+
+#define condNoisedGCMDecryption OWNAPI(condNoisedGCMDecryption)
+IPP_OWN_DECL(void, condNoisedGCMDecryption, (const Ipp8u* pSrc, Ipp8u* pDst, int ptxt_len, IppsAES_GCMState* pState))
+
+#endif /* _PCP_AES_GCM_INTERNAL_FUNC_H */
diff --git a/sources/ippcp/pcpaes_gcm_vaes512.h b/sources/ippcp/pcpaes_gcm_vaes512.h
index dc98428c..de551d98 100644
--- a/sources/ippcp/pcpaes_gcm_vaes512.h
+++ b/sources/ippcp/pcpaes_gcm_vaes512.h
@@ -59,7 +59,7 @@ static __ALIGN64 Ipp8u swapBytes[] = {
  *
  *  NB: make sure unused parts of input registers are zeroed to avoid issues with further horizontal XOR.
  */
-__INLINE void AesGcmKaratsubaMul4(const __m512i * const pA,              /* A3 A2 A1 A0                   */
+__IPPCP_INLINE void AesGcmKaratsubaMul4(const __m512i * const pA,              /* A3 A2 A1 A0                   */
                                 const __m512i * const pHKeys,          /* B3 B2 B1 B0                   */
                                 const __m512i * const pHKeysKaratsuba, /* precomputed (b1i^b0i)         */
                                 __m512i * const pH,
@@ -77,7 +77,7 @@ __INLINE void AesGcmKaratsubaMul4(const __m512i * const pA,              /* A3 A
 /* The function performs horizontal XOR for 4 128-bit values in 512-bit register
    128-bit result value saved in the low part of the 512-bit register
  */
-__INLINE void HXor4x128(const __m512i * const zmm,
+__IPPCP_INLINE void HXor4x128(const __m512i * const zmm,
                       __m128i * const xmm)
 {
    __m256i ymm;
@@ -92,7 +92,7 @@ __INLINE void HXor4x128(const __m512i * const zmm,
 /* The function performs Montgomery reduction of 256-bit polynomial to 128-bit one
    with irreducible polynomial
  */
-__INLINE void ReducePoly2x128(const __m128i * const pHI,
+__IPPCP_INLINE void ReducePoly2x128(const __m128i * const pHI,
                             const __m128i * const pLO,
                             __m128i * const result)
 {
@@ -114,7 +114,7 @@ __INLINE void ReducePoly2x128(const __m128i * const pHI,
 }
 
 /* The function aggregates partial products of Karatsuba multiplication into final ghash value */
-__INLINE void AggregateKaratsubaPartialProducts(const __m512i * const pH,
+__IPPCP_INLINE void AggregateKaratsubaPartialProducts(const __m512i * const pH,
                                      const __m512i * const pM,
                                      const __m512i * const pL,
                                      __m128i * const result)
diff --git a/sources/ippcp/pcpaes_gcmdecrypt.c b/sources/ippcp/pcpaes_gcmdecrypt.c
index 57f05922..595778b4 100644
--- a/sources/ippcp/pcpaes_gcmdecrypt.c
+++ b/sources/ippcp/pcpaes_gcmdecrypt.c
@@ -29,7 +29,7 @@
 #include "owncp.h"
 #include "pcpaesm.h"
 #include "pcptool.h"
-#include "pcpaes_internal_func.h"
+#include "pcpaes_gcm_internal_func.h"
 
 #if (_ALG_AES_SAFE_==_ALG_AES_SAFE_COMPACT_SBOX_)
 #  include "pcprijtables.h"
diff --git a/sources/ippcp/pcpaes_gcmencrypt.c b/sources/ippcp/pcpaes_gcmencrypt.c
index 626e3df6..c37b6799 100644
--- a/sources/ippcp/pcpaes_gcmencrypt.c
+++ b/sources/ippcp/pcpaes_gcmencrypt.c
@@ -29,7 +29,7 @@
 #include "owncp.h"
 #include "pcpaesm.h"
 #include "pcptool.h"
-#include "pcpaes_internal_func.h"
+#include "pcpaes_gcm_internal_func.h"
 
 #if (_ALG_AES_SAFE_==_ALG_AES_SAFE_COMPACT_SBOX_)
 #  include "pcprijtables.h"
diff --git a/sources/ippcp/pcpaes_gcminit.c b/sources/ippcp/pcpaes_gcminit.c
index 586b6236..fd41a444 100644
--- a/sources/ippcp/pcpaes_gcminit.c
+++ b/sources/ippcp/pcpaes_gcminit.c
@@ -30,7 +30,7 @@
 #include "owncp.h"
 #include "pcpaesm.h"
 #include "pcptool.h"
-#include "pcpaes_internal_func.h"
+#include "pcpaes_gcm_internal_func.h"
 
 #if (_ALG_AES_SAFE_==_ALG_AES_SAFE_COMPACT_SBOX_)
 #  include "pcprijtables.h"
@@ -89,7 +89,7 @@ IPPFUN(IppStatus, ippsAES_GCMInit,(const Ipp8u* pKey, int keyLen, IppsAES_GCMSta
 
    Ipp8u zeroKey[32] = {0};
    const Ipp8u* pActualKey = pKey? pKey : zeroKey;
-   
+
 #if (_AES_PROB_NOISE == _FEATURE_ON_)
       /* Reset AES noise parameters */
       cpAESNoiseParams *params = (cpAESNoiseParams *)&AESGCM_NOISE_PARAMS(pState);
@@ -132,7 +132,7 @@ IPPFUN(IppStatus, ippsAES_GCMInit,(const Ipp8u* pKey, int keyLen, IppsAES_GCMSta
    }
 
 #else
-   
+
    /* init cipher */
    {
       IppStatus sts = ippsAESInit(pKey, keyLen, AESGCM_CIPHER(pState), cpSizeofCtx_AES());
@@ -155,24 +155,24 @@ IPPFUN(IppStatus, ippsAES_GCMInit,(const Ipp8u* pKey, int keyLen, IppsAES_GCMSta
       #endif
    }
 
+   #if (_IPP >=_IPP_H9) || (_IPP32E>=_IPP32E_L9)
+      if (IsFeatureEnabled(ippCPUID_AVX2VAES|ippCPUID_AVX2VCLMUL)) {
+         AesGcmPrecompute_avx2_vaes(AESGCM_CPWR(pState), AESGCM_HKEY(pState));
+      }
+      else
+   #endif /* #if (_IPP==_IPP_H9) || (_IPP32E==_IPP32E_L9) */
+
    #if (_IPP>=_IPP_P8) || (_IPP32E>=_IPP32E_Y8)
-   // the dead code that currently is unused
-   //#if(_IPP32E>=_IPP32E_K0)
-   //if (IsFeatureEnabled(ippCPUID_AVX512VAES)) {
-   //   /* pre-compute hKey<<1, (hKey<<1)^2, (hKey<<1)^3, ... , (hKey<<1)^15 and corresponding
-   //      Karatsuba constant multipliers for aggregated reduction */
-   //   AesGcmPrecompute_vaes(AESGCM_CPWR(pState), AESGCM_HKEY(pState));
-   //}
-   //else
-   //#endif /* #if(_IPP32E>=_IPP32E_K0) */
-   if(IsFeatureEnabled(ippCPUID_AES|ippCPUID_CLMUL) || IsFeatureEnabled(ippCPUID_AVX2VAES|ippCPUID_AVX2VCLMUL)) {
-      /* pre-compute reflect(hkey) and hKey<<1, (hKey<<1)^2 and (hKey<<1)^4 powers of hKey */
-      AesGcmPrecompute_avx(AESGCM_CPWR(pState), AESGCM_HKEY(pState));
-   }
-   else
-   #endif
-      AesGcmPrecompute_table2K(AES_GCM_MTBL(pState), AESGCM_HKEY(pState));
-   #endif /* #if(_IPP32E>=_IPP32E_K0) */
+         if(IsFeatureEnabled(ippCPUID_AES|ippCPUID_CLMUL)) {
+            /* pre-compute reflect(hkey) and hKey<<1, (hKey<<1)^2 and (hKey<<1)^4 powers of hKey */
+            AesGcmPrecompute_avx(AESGCM_CPWR(pState), AESGCM_HKEY(pState));
+         }
+         else
+   #endif /* #if (_IPP>=_IPP_P8) || (_IPP32E>=_IPP32E_Y8) */
+         AesGcmPrecompute_table2K(AES_GCM_MTBL(pState), AESGCM_HKEY(pState));
+
+#endif /* #if(_IPP32E>=_IPP32E_K0) */
+
 
    return ippStsNoErr;
 }
diff --git a/sources/ippcp/pcpaes_gcmmul_vaes512.c b/sources/ippcp/pcpaes_gcmmul_vaes512.c
index 4953eccc..76b30a74 100644
--- a/sources/ippcp/pcpaes_gcmmul_vaes512.c
+++ b/sources/ippcp/pcpaes_gcmmul_vaes512.c
@@ -45,7 +45,7 @@
    polynomial reduction. 2 polynomials can be processed at one call.
    The inputs are bit-reflected. The result is bit-reflected.
  */
-__INLINE void AesGcmGhash2(const __m256i* const src1,
+__IPPCP_INLINE void AesGcmGhash2(const __m256i* const src1,
                          const __m256i* const src2,
                          __m256i * const result)
 {
@@ -101,7 +101,7 @@ __INLINE void AesGcmGhash2(const __m256i* const src1,
    polynomial reduction.
    The inputs are bit-reflected. The result is bit-reflected.
  */
-__INLINE void AesGcmGhash(const __m128i* const a,
+__IPPCP_INLINE void AesGcmGhash(const __m128i* const a,
                         const __m128i* const b,
                         __m128i * const result)
 {
@@ -119,7 +119,7 @@ __INLINE void AesGcmGhash(const __m128i* const a,
    polynomial reduction. 4 polynomials can be processed at one call.
    The inputs are bit-reflected. The result is bit-reflected.
  */
-__INLINE void AesGcmGhash4(const __m512i* const src1,
+__IPPCP_INLINE void AesGcmGhash4(const __m512i* const src1,
                          const __m512i* const src2,
                          __m512i * const result)
 {
diff --git a/sources/ippcp/pcpaes_gcmreinit.c b/sources/ippcp/pcpaes_gcmreinit.c
index f171a996..069b698b 100644
--- a/sources/ippcp/pcpaes_gcmreinit.c
+++ b/sources/ippcp/pcpaes_gcmreinit.c
@@ -28,7 +28,7 @@
 #include "owndefs.h"
 #include "owncp.h"
 #include "pcpaesm.h"
-#include "pcpaes_internal_func.h"
+#include "pcpaes_gcm_internal_func.h"
 #include "pcptool.h"
 
 #if (_IPP32E >= _IPP32E_K0)
diff --git a/sources/ippcp/pcpaes_internal_func.c b/sources/ippcp/pcpaes_internal_func.c
index 2624bd9c..b9b47d7b 100644
--- a/sources/ippcp/pcpaes_internal_func.c
+++ b/sources/ippcp/pcpaes_internal_func.c
@@ -18,25 +18,15 @@
 //
 //  Purpose:
 //     Cryptography Primitive.
-//        * Initialization functions for internal methods and pointers inside 
-//          AES cipher context and AES-GCM context;
-//        * AES-GCM encryption kernels with the conditional noise injections mechanism;
+//        * Initialization functions for internal methods and pointers inside AES cipher context
 //
 */
 
 #include "pcpaes_internal_func.h"
-#include "aes_gcm_avx512.h"
-#include "owndefs.h"
 #include "owncp.h"
 #include "pcpaesm.h"
 #include "pcptool.h"
 
-#if (_IPP32E >= _IPP32E_K0)
-#include "pcpaesauthgcm_avx512.h"
-#else
-#include "pcpaesauthgcm.h"
-#endif /* #if(_IPP32E>=_IPP32E_K0) */
-
 /*
  * This function set up pointers to encryption and decryption key schedules,
  * dispatches to the right internal methods and sets pointers to them inside the AES state.
@@ -75,226 +65,3 @@ IPP_OWN_DEFN(void, cpAes_setup_ptrs_and_methods, (IppsAESSpec * pCtx))
    }
 #endif
 }
-
-/*
- * This function dispatches to the right internal methods and sets pointers to them inside the AES-GCM state.
- */
-IPP_OWN_DEFN(void, cpAesGCM_setup_ptrs_and_methods, (IppsAES_GCMState * pState, Ipp64u keyByteLen))
-{
-#if (_IPP32E >= _IPP32E_K0)
-   if (IsFeatureEnabled(ippCPUID_AVX512VAES) && IsFeatureEnabled(ippCPUID_AVX512VCLMUL)) {
-      switch (keyByteLen) {
-      case 16:
-         AES_GCM_ENCRYPT_UPDATE(pState) = aes_gcm_enc_128_update_vaes_avx512;
-         AES_GCM_DECRYPT_UPDATE(pState) = aes_gcm_dec_128_update_vaes_avx512;
-         AES_GCM_GET_TAG(pState)        = aes_gcm_gettag_128_vaes_avx512;
-         break;
-      case 24:
-         AES_GCM_ENCRYPT_UPDATE(pState) = aes_gcm_enc_192_update_vaes_avx512;
-         AES_GCM_DECRYPT_UPDATE(pState) = aes_gcm_dec_192_update_vaes_avx512;
-         AES_GCM_GET_TAG(pState)        = aes_gcm_gettag_192_vaes_avx512;
-         break;
-      case 32:
-         AES_GCM_ENCRYPT_UPDATE(pState) = aes_gcm_enc_256_update_vaes_avx512;
-         AES_GCM_DECRYPT_UPDATE(pState) = aes_gcm_dec_256_update_vaes_avx512;
-         AES_GCM_GET_TAG(pState)        = aes_gcm_gettag_256_vaes_avx512;
-         break;
-      }
-
-      AES_GCM_IV_UPDATE(pState)   = aes_gcm_iv_hash_update_vaes512;
-      AES_GCM_IV_FINALIZE(pState) = aes_gcm_iv_hash_finalize_vaes512;
-      AES_GCM_AAD_UPDATE(pState)  = aes_gcm_aad_hash_update_vaes512;
-      AES_GCM_GMUL(pState)        = aes_gcm_gmult_vaes512;
-   } else {
-      switch (keyByteLen) {
-      case 16:
-         AES_GCM_ENCRYPT_UPDATE(pState) = aes_gcm_enc_128_update_avx512;
-         AES_GCM_DECRYPT_UPDATE(pState) = aes_gcm_dec_128_update_avx512;
-         AES_GCM_GET_TAG(pState)        = aes_gcm_gettag_128_avx512;
-         break;
-      case 24:
-         AES_GCM_ENCRYPT_UPDATE(pState) = aes_gcm_enc_192_update_avx512;
-         AES_GCM_DECRYPT_UPDATE(pState) = aes_gcm_dec_192_update_avx512;
-         AES_GCM_GET_TAG(pState)        = aes_gcm_gettag_192_avx512;
-         break;
-      case 32:
-         AES_GCM_ENCRYPT_UPDATE(pState) = aes_gcm_enc_256_update_avx512;
-         AES_GCM_DECRYPT_UPDATE(pState) = aes_gcm_dec_256_update_avx512;
-         AES_GCM_GET_TAG(pState)        = aes_gcm_gettag_256_avx512;
-         break;
-      }
-
-      AES_GCM_IV_UPDATE(pState)   = aes_gcm_iv_hash_update_avx512;
-      AES_GCM_IV_FINALIZE(pState) = aes_gcm_iv_hash_finalize_avx512;
-      AES_GCM_AAD_UPDATE(pState)  = aes_gcm_aad_hash_update_avx512;
-      AES_GCM_GMUL(pState)        = aes_gcm_gmult_avx512;
-   }
-#else
-   IPP_UNREFERENCED_PARAMETER(keyByteLen);
-
-   /* set up:
-   // - ghash function
-   // - authentication function
-   */
-   AESGCM_HASH(pState) = AesGcmMulGcm_table2K_ct; // AesGcmMulGcm_table2K;
-   AESGCM_AUTH(pState) = AesGcmAuth_table2K_ct;   // AesGcmAuth_table2K;
-   AESGCM_ENC(pState)  = wrpAesGcmEnc_table2K;
-   AESGCM_DEC(pState)  = wrpAesGcmDec_table2K;
-
-#if (_IPP>=_IPP_P8) || (_IPP32E>=_IPP32E_Y8)
-// the dead code that currently is unused
-//#if (_IPP32E >= _IPP32E_K0)
-//   if (IsFeatureEnabled(ippCPUID_AVX512VAES)) {
-//      AESGCM_HASH(pState) = AesGcmMulGcm_vaes;
-//      AESGCM_AUTH(pState) = AesGcmAuth_vaes;
-//      AESGCM_ENC(pState)  = AesGcmEnc_vaes;
-//      AESGCM_DEC(pState)  = AesGcmDec_vaes;
-//   } else
-//#endif /* #if(_IPP32E>=_IPP32E_K0) */
-      if (IsFeatureEnabled(ippCPUID_AES | ippCPUID_CLMUL)) {
-         AESGCM_HASH(pState) = AesGcmMulGcm_avx;
-         AESGCM_AUTH(pState) = AesGcmAuth_avx;
-         AESGCM_ENC(pState)  = wrpAesGcmEnc_avx;
-         AESGCM_DEC(pState)  = wrpAesGcmDec_avx;
-      }
-#if (_IPP==_IPP_H9) || (_IPP32E==_IPP32E_L9)
-      if (IsFeatureEnabled(ippCPUID_AVX2VAES | ippCPUID_AVX2VCLMUL)) {
-         AESGCM_HASH(pState) = AesGcmMulGcm_avx;
-         AESGCM_AUTH(pState) = AesGcmAuth_avx;
-         AESGCM_ENC(pState)  =  AesGcmEnc_vaes_avx2;
-         AESGCM_DEC(pState)  = AesGcmDec_vaes_avx2;
-      }
-#endif /* #if(_IPP==_IPP_H9) || (_IPP32E==_IPP32E_L9) */
-#endif /* #if(_IPP>=_IPP_P8) || (_IPP32E>=_IPP32E_Y8) */
-
-#endif /* #if(_IPP32E>=_IPP32E_K0) */
-}
-
-
-
-/*!
- * This function computes AES-GCM encryption kernel with the the conditional noise injections mechanism (Mistletoe3 
- * attack mitigation).
- *
- * Parameters:
- *    \param[in] pSrc      Pointer to plaintext.
- *    \param[in] pDst      Pointer to ciphertext.
- *    \param[in] ptxt_len  Length of the plaintext in bytes.
- *    \param[in] pState    Pointer to the AES-GCM context.
- */
-IPP_OWN_DEFN(void, condNoisedGCMEncryption, (const Ipp8u* pSrc, Ipp8u* pDst, int ptxt_len,
-                                                      IppsAES_GCMState* pState))
-{
-/* Identify the encryption method. It's different for different platforms */
-#if(_IPP32E>=_IPP32E_K0)
-   EncryptUpdate_ encFunc = AES_GCM_ENCRYPT_UPDATE(pState);
-#else
-   Encrypt_ encFunc = AESGCM_ENC(pState);
-#endif
-
-#if (_AES_PROB_NOISE == _FEATURE_ON_)
-   /* Mistletoe3 mitigation */
-   cpAESNoiseParams *params = (cpAESNoiseParams*)&AESGCM_NOISE_PARAMS(pState);
-   if (AES_NOISE_LEVEL(params) > 0) {
-      /* Number of bytes allowed for operation without adding noise */
-      int chunk_size;
-      /* Number of bytes remaining for operation */
-      int remaining_size = ptxt_len;
-
-      while (remaining_size > 0) {
-         /* How many bytes to encrypt in this operation */
-         chunk_size = (remaining_size >= MISTLETOE3_MAX_CHUNK_SIZE) ? 
-                      MISTLETOE3_MAX_CHUNK_SIZE : 
-                      remaining_size;
-
-      #if(_IPP32E>=_IPP32E_K0)
-         encFunc(&AES_GCM_KEY_DATA(pState), &AES_GCM_CONTEXT_DATA(pState), 
-                 pDst, pSrc, (Ipp64u)chunk_size);
-      #else
-         encFunc(pDst, pSrc, chunk_size, pState);
-      #endif
-
-         cpAESRandomNoise(NULL,
-                  MISTLETOE3_BASE_NOISE_LEVEL + AES_NOISE_LEVEL(params),
-                  MISTLETOE3_NOISE_RATE,
-                  &AES_NOISE_RAND(params));
-
-         pSrc += chunk_size;
-         pDst += chunk_size;
-         remaining_size -= chunk_size;
-      }
-   } else
-#endif
-   { /* Process without noise injection */
-   #if(_IPP32E>=_IPP32E_K0)
-      encFunc(&AES_GCM_KEY_DATA(pState), &AES_GCM_CONTEXT_DATA(pState), 
-              pDst, pSrc, (Ipp64u)ptxt_len);
-   #else
-      encFunc(pDst, pSrc, ptxt_len, pState);
-   #endif
-   }
-}
-
-
-/*!
- * This function computes AES-GCM decryption kernel with the the conditional noise injections mechanism (Mistletoe3 
- * attack mitigation).
- *
- * Parameters:
- *    \param[in] pSrc      Pointer to ciphertext.
- *    \param[in] pDst      Pointer to deciphered text.
- *    \param[in] ctxt_len  Length of the ciphertext in bytes.
- *    \param[in] pState    Pointer to the AES-GCM context.
- */
-IPP_OWN_DEFN(void, condNoisedGCMDecryption, (const Ipp8u* pSrc, Ipp8u* pDst, int ctxt_len, 
-                                                      IppsAES_GCMState* pState))
-{
-/* Identify the decryption method. It's different for different platforms */
-#if(_IPP32E>=_IPP32E_K0)
-   DecryptUpdate_ decFunc = AES_GCM_DECRYPT_UPDATE(pState);
-#else
-   Decrypt_ decFunc = AESGCM_DEC(pState);
-#endif
-
-#if (_AES_PROB_NOISE == _FEATURE_ON_)
-   /* Mistletoe3 mitigation */
-   cpAESNoiseParams *params = (cpAESNoiseParams*)&AESGCM_NOISE_PARAMS(pState);
-   if (AES_NOISE_LEVEL(params) > 0) {
-      /* Number of bytes allowed for operation without adding noise */
-      int chunk_size;
-      /* Number of bytes remaining for operation */
-      int remaining_size = ctxt_len;
-
-      while (remaining_size > 0) {
-         /* How many bytes to decrypt in this operation */
-         chunk_size = (remaining_size >= MISTLETOE3_MAX_CHUNK_SIZE) ?
-                       MISTLETOE3_MAX_CHUNK_SIZE : 
-                       remaining_size;
-
-      #if(_IPP32E>=_IPP32E_K0)
-         decFunc(&AES_GCM_KEY_DATA(pState), &AES_GCM_CONTEXT_DATA(pState), 
-                 pDst, pSrc, (Ipp64u)chunk_size);
-      #else
-         decFunc(pDst, pSrc, chunk_size, pState);
-      #endif
-
-         cpAESRandomNoise(NULL,
-                  MISTLETOE3_BASE_NOISE_LEVEL + AES_NOISE_LEVEL(params),
-                  MISTLETOE3_NOISE_RATE,
-                  &AES_NOISE_RAND(params));
-
-         pSrc += chunk_size;
-         pDst += chunk_size;
-         remaining_size -= chunk_size;
-      }
-   } else
-#endif
-   { /* Process without noise injection */
-   #if(_IPP32E>=_IPP32E_K0)
-      decFunc(&AES_GCM_KEY_DATA(pState), &AES_GCM_CONTEXT_DATA(pState), 
-              pDst, pSrc, (Ipp64u)ctxt_len);
-   #else
-      decFunc(pDst, pSrc, ctxt_len, pState);
-   #endif
-   }
-}
diff --git a/sources/ippcp/pcpaes_internal_func.h b/sources/ippcp/pcpaes_internal_func.h
index d0082401..fe10de80 100644
--- a/sources/ippcp/pcpaes_internal_func.h
+++ b/sources/ippcp/pcpaes_internal_func.h
@@ -19,8 +19,6 @@
 //  Purpose:
 //     Cryptography Primitive.
 //        Initialization functions for internal methods and pointers inside AES cipher context
-//        and AES-GCM context;
-//        AES-GCM encryption kernels with the conditional noise injections mechanism;
 //
 */
 
@@ -32,13 +30,4 @@
 #define cpAes_setup_ptrs_and_methods OWNAPI(cpAes_setup_ptrs_and_methods)
 IPP_OWN_DECL(void, cpAes_setup_ptrs_and_methods, (IppsAESSpec * pCtx))
 
-#define cpAesGCM_setup_ptrs_and_methods OWNAPI(cpAesGCM_setup_ptrs_and_methods)
-IPP_OWN_DECL(void, cpAesGCM_setup_ptrs_and_methods, (IppsAES_GCMState * pCtx, Ipp64u keyByteLen))
-
-#define condNoisedGCMEncryption OWNAPI(condNoisedGCMEncryption)
-IPP_OWN_DECL(void, condNoisedGCMEncryption, (const Ipp8u* pSrc, Ipp8u* pDst, int ptxt_len, IppsAES_GCMState* pState))
-
-#define condNoisedGCMDecryption OWNAPI(condNoisedGCMDecryption)
-IPP_OWN_DECL(void, condNoisedGCMDecryption, (const Ipp8u* pSrc, Ipp8u* pDst, int ptxt_len, IppsAES_GCMState* pState))
-
 #endif /* _PCP_AES_INTERNAL_FUNC_H */
diff --git a/sources/ippcp/pcpaes_sivstuff.h b/sources/ippcp/pcpaes_sivstuff.h
index 76108c4a..83b56927 100644
--- a/sources/ippcp/pcpaes_sivstuff.h
+++ b/sources/ippcp/pcpaes_sivstuff.h
@@ -14,12 +14,12 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     AES-SIV Functions (RFC 5297)
-// 
+//
 //  Contents:
 //        Stuff()
 //
@@ -35,12 +35,12 @@
 
 #define _PCP_AES_SIV_STUFF_H_
 ////////////////////////////////////////////////////////////
-__INLINE void cpAES_CMAC(Ipp8u mac[MBS_RIJ128], const Ipp8u* pSrc, int len, IppsAES_CMACState* pCtx)
+__IPPCP_INLINE void cpAES_CMAC(Ipp8u mac[MBS_RIJ128], const Ipp8u* pSrc, int len, IppsAES_CMACState* pCtx)
 {
    ippsAES_CMACUpdate(pSrc, len, pCtx);
    ippsAES_CMACFinal(mac, MBS_RIJ128, pCtx);
 }
-__INLINE IppStatus cpAES_S2V_init(Ipp8u v[MBS_RIJ128], const Ipp8u* pKey, int keyLen, IppsAES_CMACState* pCtx, int ctxSize)
+__IPPCP_INLINE IppStatus cpAES_S2V_init(Ipp8u v[MBS_RIJ128], const Ipp8u* pKey, int keyLen, IppsAES_CMACState* pCtx, int ctxSize)
 {
    IppStatus sts = ippsAES_CMACInit(pKey, keyLen, pCtx, ctxSize);
    if(ippStsNoErr==sts) {
@@ -49,7 +49,7 @@ __INLINE IppStatus cpAES_S2V_init(Ipp8u v[MBS_RIJ128], const Ipp8u* pKey, int ke
    }
    return sts;
 }
-__INLINE Ipp8u* double16(Ipp8u out[MBS_RIJ128], const Ipp8u inp[MBS_RIJ128])
+__IPPCP_INLINE Ipp8u* double16(Ipp8u out[MBS_RIJ128], const Ipp8u inp[MBS_RIJ128])
 {
    /* double inp */
    Ipp32u carry = 0;
@@ -63,7 +63,7 @@ __INLINE Ipp8u* double16(Ipp8u out[MBS_RIJ128], const Ipp8u inp[MBS_RIJ128])
    out[MBS_RIJ128-1] ^= ((Ipp8u)(0-carry) & 0x87);
    return out;
 }
-__INLINE void cpAES_S2V_update(Ipp8u v[MBS_RIJ128], const Ipp8u* pSrc, int len, IppsAES_CMACState* pCtx)
+__IPPCP_INLINE void cpAES_S2V_update(Ipp8u v[MBS_RIJ128], const Ipp8u* pSrc, int len, IppsAES_CMACState* pCtx)
 {
    Ipp8u t[MBS_RIJ128];
    cpAES_CMAC(t, pSrc, len, pCtx);
diff --git a/sources/ippcp/pcpaes_xts_vaes512.c b/sources/ippcp/pcpaes_xts_vaes512.c
index 201f1527..5f9009d9 100644
--- a/sources/ippcp/pcpaes_xts_vaes512.c
+++ b/sources/ippcp/pcpaes_xts_vaes512.c
@@ -42,7 +42,7 @@
 #define M512(mem)    (*((__m512i*)(mem)))
 
 /* Generate next 4 tweaks with 2^8 multiplier */
-__INLINE __m512i nextTweaks_x8(__m512i tweak128x4)
+__IPPCP_INLINE __m512i nextTweaks_x8(__m512i tweak128x4)
 {
    const __m512i poly = _mm512_set_epi64(0, 0x87, 0, 0x87, 0, 0x87, 0, 0x87);
 
@@ -55,7 +55,7 @@ __INLINE __m512i nextTweaks_x8(__m512i tweak128x4)
 }
 
 /* Generate next 4 tweaks with 2^32 multiplier */
-__INLINE __m512i nextTweaks_x32(__m512i tweak128x4)
+__IPPCP_INLINE __m512i nextTweaks_x32(__m512i tweak128x4)
 {
    const __m512i poly = _mm512_set_epi64(0, 0x87, 0, 0x87, 0, 0x87, 0, 0x87);
 
diff --git a/sources/ippcp/pcpaesauthgcm.h b/sources/ippcp/pcpaesauthgcm.h
index c6c5eadc..a7aeca56 100644
--- a/sources/ippcp/pcpaesauthgcm.h
+++ b/sources/ippcp/pcpaesauthgcm.h
@@ -14,14 +14,14 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     Message Authentication Algorithm
 //     Internal Definitions and Internal Functions Prototypes
-// 
-// 
+//
+//
 */
 
 #if !defined(_CP_AESAUTH_GCM_H)
@@ -54,7 +54,7 @@ typedef enum {
 } GcmState;
 
 struct _cpAES_GCM {
-   
+
    Ipp32u   idCtx;                  /* AES-GCM id                    */
    GcmState state;                  /* GCM state: Init, IV|AAD|TXT processing */
    Ipp64u   ivLen;                  /* IV length (bytes)             */
@@ -77,13 +77,19 @@ struct _cpAES_GCM {
    IppsAESSpec cipher;
 
 #if (_AES_PROB_NOISE == _FEATURE_ON_)
-   __ALIGN16 
+   __ALIGN16
    cpAESNoiseParams noiseParams;
 #endif
 
    __ALIGN16                        /* aligned pre-computed data:    */
    Ipp8u multiplier[BLOCK_SIZE];    /* - (default) hKey                             */
                                     /* - (aes_ni)  hKey*t, (hKey*t)^2, (hKey*t)^4   */
+                                    /* - (avx2_vaes) 16 vectors by 128-bit values
+                                       hKey<<1,    hKey^2<<1,  hKey^3<<1,  hKey^4<<1,
+                                       hKey^5<<1,  hKey^6<<1,  hKey^7<<1,  hKey^8<<1,
+                                       hKey^9<<1,  hKey^10<<1, hKey^11<<1, hKey^12<<1,
+                                       hKey^13<<1, hKey^14<<1, hKey^15<<1, hKey^16<<1,
+                                    */
                                     /* - (vaes_ni) 8 reverted ordered vectors by 4 128-bit values.
                                       hKeys derivations in the multiplier[] array in order of appearance
                                       (zero-index starts from the left):
@@ -101,9 +107,10 @@ struct _cpAES_GCM {
 /* alignment */
 #define AESGCM_ALIGNMENT   (16)
 
-#define PRECOMP_DATA_SIZE_AES_NI_AESGCM   (BLOCK_SIZE*4)
-#define PRECOMP_DATA_SIZE_VAES_NI_AESGCM  (BLOCK_SIZE*16*2)
-#define PRECOMP_DATA_SIZE_FAST2K          (BLOCK_SIZE*128)
+#define PRECOMP_DATA_SIZE_AES_NI_AESGCM    (BLOCK_SIZE*4)
+#define PRECOMP_DATA_SIZE_AVX2_VAES_AESGCM (BLOCK_SIZE*16)
+#define PRECOMP_DATA_SIZE_VAES_NI_AESGCM   (BLOCK_SIZE*16*2)
+#define PRECOMP_DATA_SIZE_FAST2K           (BLOCK_SIZE*128)
 
 /*
 // Useful macros
@@ -139,13 +146,13 @@ struct _cpAES_GCM {
 #define AESGCM_VALID_ID(context)     ((((context)->idCtx) ^ (Ipp32u)IPP_UINT_PTR((context))) == (Ipp32u)idCtxAESGCM)
 
 #if 0
-__INLINE void IncrementCounter32(Ipp8u* pCtr)
+__IPPCP_INLINE void IncrementCounter32(Ipp8u* pCtr)
 {
    int i;
    for(i=BLOCK_SIZE-1; i>=CTR_POS && 0==(Ipp8u)(++pCtr[i]); i--) ;
 }
 #endif
-__INLINE void IncrementCounter32(Ipp8u* pCtr)
+__IPPCP_INLINE void IncrementCounter32(Ipp8u* pCtr)
 {
    Ipp32u* pCtr32 = (Ipp32u*)pCtr;
    Ipp32u ctrVal = pCtr32[3];
@@ -156,6 +163,8 @@ __INLINE void IncrementCounter32(Ipp8u* pCtr)
 }
 
 #if (_IPP>=_IPP_P8) || (_IPP32E>=_IPP32E_Y8)
+#define AesGcmPrecompute_avx2_vaes OWNAPI(AesGcmPrecompute_avx2_vaes)
+   IPP_OWN_DECL (void, AesGcmPrecompute_avx2_vaes, (Ipp8u* pPrecomputeData, const Ipp8u* pHKey))
 #define AesGcmPrecompute_avx OWNAPI(AesGcmPrecompute_avx)
    IPP_OWN_DECL (void, AesGcmPrecompute_avx, (Ipp8u* pPrecomputeData, const Ipp8u* pHKey))
 #define AesGcmMulGcm_avx OWNAPI(AesGcmMulGcm_avx)
@@ -217,7 +226,9 @@ static int cpSizeofCtx_AESGCM(void)
    int precomp_size;
 
    #if (_IPP>=_IPP_P8) || (_IPP32E>=_IPP32E_Y8)
-   if(IsFeatureEnabled(ippCPUID_AES|ippCPUID_CLMUL) || IsFeatureEnabled(ippCPUID_AVX2VAES|ippCPUID_AVX2VCLMUL))
+   if (IsFeatureEnabled(ippCPUID_AVX2VAES|ippCPUID_AVX2VCLMUL))
+      precomp_size = PRECOMP_DATA_SIZE_AVX2_VAES_AESGCM;
+   else if (IsFeatureEnabled(ippCPUID_AES|ippCPUID_CLMUL))
       precomp_size = PRECOMP_DATA_SIZE_AES_NI_AESGCM;
    else
    #endif
diff --git a/sources/ippcp/pcpaesauthgcm_avx512.h b/sources/ippcp/pcpaesauthgcm_avx512.h
index 9a8cd569..d774ce97 100644
--- a/sources/ippcp/pcpaesauthgcm_avx512.h
+++ b/sources/ippcp/pcpaesauthgcm_avx512.h
@@ -18,7 +18,7 @@
 //
 //  Purpose:
 //     Cryptography Primitive.
-//     AES GCM otimized for AVX512 and AVX512-VAES features
+//     AES GCM optimized for AVX512 and AVX512-VAES features
 //     Internal Definitions
 //
 //
@@ -98,10 +98,10 @@ struct _cpAES_GCM {
    EncryptUpdate_   encryptUpdateFunc;    // Encryption-authentication
    DecryptUpdate_   decryptUpdateFunc;    // Decryption-verification
    GetTag_          getTagFunc;           // Get tag
-   
+
 #if (_AES_PROB_NOISE == _FEATURE_ON_)
    __ALIGN16
-   cpAESNoiseParams noiseParams; 
+   cpAESNoiseParams noiseParams;
 #endif
 };
 
diff --git a/sources/ippcp/pcpaesgcmtbl2k_mulpx.c b/sources/ippcp/pcpaesgcmtbl2k_mulpx.c
index fad70fd4..4e79658c 100644
--- a/sources/ippcp/pcpaesgcmtbl2k_mulpx.c
+++ b/sources/ippcp/pcpaesgcmtbl2k_mulpx.c
@@ -14,17 +14,17 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     Encrypt/Decrypt byte data stream according to Rijndael128 (GCM mode)
-// 
+//
 //     "fast" stuff
-// 
+//
 //  Contents:
 //      AesGcmMulGcm_table2K()
-// 
+//
 */
 
 
@@ -55,7 +55,7 @@ typedef struct{
 //
 // Ghash = Ghash * HKey mod G()
 */
-__INLINE Ipp16u getAesGcmConst_table_ct(int idx)
+__IPPCP_INLINE Ipp16u getAesGcmConst_table_ct(int idx)
 {
    #define TBL_SLOTS_REP_READ  (Ipp32s)(sizeof(BNU_CHUNK_T)/sizeof(AesGcmConst_table[0]))
    const BNU_CHUNK_T* TblEntry = (BNU_CHUNK_T*)AesGcmConst_table;
@@ -137,13 +137,13 @@ void AesGcmMulGcm_table2K(Ipp8u* pGhash, const Ipp8u* pPrecomputeData, const voi
 // CTE version of AesGcmMulGcm_table2K()
 */
 #if (_IPP_ARCH ==_IPP_ARCH_EM64T)
-__INLINE void MaskedXorBlock16(const Ipp8u* pSrc1, const Ipp8u* pSrc2, Ipp8u* pDst, Ipp64u src2mask)
+__IPPCP_INLINE void MaskedXorBlock16(const Ipp8u* pSrc1, const Ipp8u* pSrc2, Ipp8u* pDst, Ipp64u src2mask)
 {
    ((Ipp64u*)pDst)[0] = ((Ipp64u*)pSrc1)[0] ^ (((Ipp64u*)pSrc2)[0] & src2mask);
    ((Ipp64u*)pDst)[1] = ((Ipp64u*)pSrc1)[1] ^ (((Ipp64u*)pSrc2)[1] & src2mask);
 }
 #else /* IPP_ARCH == IPP_ARCH_IA32 */
-__INLINE void MaskedXorBlock16(const Ipp8u* pSrc1, const Ipp8u* pSrc2, Ipp8u* pDst, Ipp32u src2mask)
+__IPPCP_INLINE void MaskedXorBlock16(const Ipp8u* pSrc1, const Ipp8u* pSrc2, Ipp8u* pDst, Ipp32u src2mask)
 {
    ((Ipp32u*)pDst)[0] = ((Ipp32u*)pSrc1)[0] ^ (((Ipp32u*)pSrc2)[0] & src2mask);
    ((Ipp32u*)pDst)[1] = ((Ipp32u*)pSrc1)[1] ^ (((Ipp32u*)pSrc2)[1] & src2mask);
@@ -238,7 +238,7 @@ IPP_OWN_DEFN (void, AesGcmMulGcm_table2K_ct, (Ipp8u* pGhash, const Ipp8u* pPreco
 
 #if ((_IPP>=_IPP_V8) || (_IPP32E>=_IPP32E_N8))
 
-__INLINE Ipp16u getAesGcmConst_table_ct(int idx)
+__IPPCP_INLINE Ipp16u getAesGcmConst_table_ct(int idx)
 {
    /* init current indexes */
    __ALIGN16 Ipp16u idx_start[] = { 0,1,2,3,4,5,6,7 };
diff --git a/sources/ippcp/pcpaesm.h b/sources/ippcp/pcpaesm.h
index ebf04ef6..9b220b3c 100644
--- a/sources/ippcp/pcpaesm.h
+++ b/sources/ippcp/pcpaesm.h
@@ -14,14 +14,14 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     Internal Definitions and
 //     Internal AES Function Prototypes
-// 
-// 
+//
+//
 */
 
 #if !defined(_PCP_AES_H)
@@ -65,13 +65,13 @@ static int rij128nKeys[3] = {44,  52,  60 };
 // helper for nRounds[] and estnKeys[] access
 // note: x is length in 32-bits words
 */
-__INLINE int rij_index(int x)
+__IPPCP_INLINE int rij_index(int x)
 {
    return (x-NB(128))>>1;
 }
 
 /* size of AES context */
-__INLINE int cpSizeofCtx_AES(void)
+__IPPCP_INLINE int cpSizeofCtx_AES(void)
 {
    return sizeof(IppsAESSpec);
 }
diff --git a/sources/ippcp/pcpaesmxts.h b/sources/ippcp/pcpaesmxts.h
index ea9d7682..a8eb0737 100644
--- a/sources/ippcp/pcpaesmxts.h
+++ b/sources/ippcp/pcpaesmxts.h
@@ -14,13 +14,13 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     AES-XTS Internal Definitions
-// 
-// 
+//
+//
 */
 
 #if !defined(_PCP_AES_XTS_H)
@@ -53,7 +53,7 @@ struct _cpAES_XTS
 #define VALID_AES_XTS_ID(ctx)   ((((ctx)->idCtx) ^ (Ipp32u)IPP_UINT_PTR((ctx))) == (Ipp32u)idCtxAESXTS)
 
 /* size of AES-XTS context */
-__INLINE int cpSizeof_AES_XTS_Ctx(void)
+__IPPCP_INLINE int cpSizeof_AES_XTS_Ctx(void)
 {
    return sizeof(IppsAES_XTSSpec);
 }
diff --git a/sources/ippcp/pcpaesmxtsstuff.h b/sources/ippcp/pcpaesmxtsstuff.h
index 41ab30de..185d54c4 100644
--- a/sources/ippcp/pcpaesmxtsstuff.h
+++ b/sources/ippcp/pcpaesmxtsstuff.h
@@ -14,13 +14,13 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     AES-XTS Internal Functions
-// 
-// 
+//
+//
 */
 
 #if !defined(_PCP_AES_XTS_STUFF_H)
@@ -39,7 +39,7 @@
 #define GF_MASK   (0x8000000000000000)
 #define GF_POLY   (0x0000000000000087)
 
-__INLINE void gf_mul_by_primitive(void* x)
+__IPPCP_INLINE void gf_mul_by_primitive(void* x)
 {
    Ipp64u* x64 = (Ipp64u*)x;
    Ipp64u xorL = ((Ipp64s)x64[1] >> 63) & GF_POLY;
diff --git a/sources/ippcp/pcpaesnoise.h b/sources/ippcp/pcpaesnoise.h
index 6fc0c9bc..986c5dc0 100644
--- a/sources/ippcp/pcpaesnoise.h
+++ b/sources/ippcp/pcpaesnoise.h
@@ -17,9 +17,9 @@
 #if !defined(_PCP_AES_NOISE_H)
 #define _PCP_AES_NOISE_H
 
-/* 
+/*
  * The parameters below are empirical and chosen in advance to guarantee
- * the high level of security protection against Mistletoe3 attack. 
+ * the high level of security protection against Mistletoe3 attack.
  */
 #define MISTLETOE3_MAX_CHUNK_SIZE   (16000)     /* maximum chunks size allowed to be processed without noise injection (in bytes) \
                                                    16000 bytes = 16*1000 bytes = 1000 AES blocks */
@@ -47,7 +47,7 @@ typedef struct _cpAESNoiseParams {
 #define AES_NOISE_LEVEL(ctx)          ((ctx)->noiseLevel)
 
 /* size of _cpAESNoiseParams structure */
-__INLINE int cpSizeofNoise_Params(void)
+__IPPCP_INLINE int cpSizeofNoise_Params(void)
 {
    return sizeof(cpAESNoiseParams);
 }
diff --git a/sources/ippcp/pcpbn.h b/sources/ippcp/pcpbn.h
index fdd53d5d..c2e5d6f4 100644
--- a/sources/ippcp/pcpbn.h
+++ b/sources/ippcp/pcpbn.h
@@ -14,12 +14,12 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
+/*
 //               Intel(R) Integrated Performance Primitives
 //                   Cryptographic Primitives (ippcp)
-// 
-// 
-// 
+//
+//
+//
 */
 
 #if !defined(_CP_BN_H)
@@ -70,7 +70,7 @@ struct _cpBigNum
    IPP_OWN_DECL (void, cpUnpackBigNumCtx, (const Ipp8u* pBuffer, IppsBigNumState* pBN))
 
 /* copy BN */
-__INLINE IppsBigNumState* cpBN_copy(IppsBigNumState* pDst, const IppsBigNumState* pSrc)
+__IPPCP_INLINE IppsBigNumState* cpBN_copy(IppsBigNumState* pDst, const IppsBigNumState* pSrc)
 {
    BN_SIGN(pDst) = BN_SIGN(pSrc);
    BN_SIZE(pDst) = BN_SIZE(pSrc);
@@ -78,7 +78,7 @@ __INLINE IppsBigNumState* cpBN_copy(IppsBigNumState* pDst, const IppsBigNumState
    return pDst;
 }
 /* set BN to zero */
-__INLINE IppsBigNumState* cpBN_zero(IppsBigNumState* pBN)
+__IPPCP_INLINE IppsBigNumState* cpBN_zero(IppsBigNumState* pBN)
 {
    BN_SIGN(pBN)   = ippBigNumPOS;
    BN_SIZE(pBN)   = 1;
@@ -86,7 +86,7 @@ __INLINE IppsBigNumState* cpBN_zero(IppsBigNumState* pBN)
    return pBN;
 }
 /* fixup BN */
-__INLINE IppsBigNumState* cpBN_fix(IppsBigNumState* pBN)
+__IPPCP_INLINE IppsBigNumState* cpBN_fix(IppsBigNumState* pBN)
 {
    cpSize len = BN_SIZE(pBN);
    FIX_BNU(BN_NUMBER(pBN), len);
@@ -94,7 +94,7 @@ __INLINE IppsBigNumState* cpBN_fix(IppsBigNumState* pBN)
    return pBN;
 }
 /* set BN to chunk */
-__INLINE IppsBigNumState* cpBN_chunk(IppsBigNumState* pBN, BNU_CHUNK_T a)
+__IPPCP_INLINE IppsBigNumState* cpBN_chunk(IppsBigNumState* pBN, BNU_CHUNK_T a)
 {
    BN_SIGN(pBN)   = ippBigNumPOS;
    BN_SIZE(pBN)   = 1;
@@ -103,7 +103,7 @@ __INLINE IppsBigNumState* cpBN_chunk(IppsBigNumState* pBN, BNU_CHUNK_T a)
    return pBN;
 }
 /* set BN to 2^m */
-__INLINE IppsBigNumState* cpBN_power2(IppsBigNumState* pBN, int power)
+__IPPCP_INLINE IppsBigNumState* cpBN_power2(IppsBigNumState* pBN, int power)
 {
    cpSize size = BITS_BNU_CHUNK(power+1);
    if(BN_ROOM(pBN) >= size) {
@@ -117,14 +117,14 @@ __INLINE IppsBigNumState* cpBN_power2(IppsBigNumState* pBN, int power)
 }
 
 /* bitsize of BN */
-__INLINE int cpBN_bitsize(const IppsBigNumState* pA)
+__IPPCP_INLINE int cpBN_bitsize(const IppsBigNumState* pA)
 {
    int bitsize =  BITSIZE_BNU(BN_NUMBER(pA), BN_SIZE(pA));
    return bitsize;
 }
 
 /* returns -1/0/+1 depemding on A~B comparison */
-__INLINE int cpBN_cmp(const IppsBigNumState* pA, const IppsBigNumState* pB)
+__IPPCP_INLINE int cpBN_cmp(const IppsBigNumState* pA, const IppsBigNumState* pB)
 {
    IppsBigNumSGN signA = BN_SIGN(pA);
    IppsBigNumSGN signB = BN_SIGN(pB);
@@ -137,7 +137,7 @@ __INLINE int cpBN_cmp(const IppsBigNumState* pA, const IppsBigNumState* pB)
 }
 
 /* returns -1/0/+1 depemding on A comparison  0</==0/>0 */
-__INLINE int cpBN_tst(const IppsBigNumState* pA)
+__IPPCP_INLINE int cpBN_tst(const IppsBigNumState* pA)
 {
    if(1==BN_SIZE(pA) && 0==BN_NUMBER(pA)[0])
       return 0;
@@ -146,17 +146,17 @@ __INLINE int cpBN_tst(const IppsBigNumState* pA)
 }
 
 
-// some addtition functions
-__INLINE int IsZero_BN(const IppsBigNumState* pA)
+// some addition functions
+__IPPCP_INLINE int IsZero_BN(const IppsBigNumState* pA)
 {
    return ( BN_SIZE(pA)==1 ) && ( BN_NUMBER(pA)[0]==0 );
 }
-__INLINE int IsOdd_BN(const IppsBigNumState* pA)
+__IPPCP_INLINE int IsOdd_BN(const IppsBigNumState* pA)
 {
    return BN_NUMBER(pA)[0] & 1;
 }
 
-__INLINE IppsBigNumState* BN_Word(IppsBigNumState* pBN, BNU_CHUNK_T w)
+__IPPCP_INLINE IppsBigNumState* BN_Word(IppsBigNumState* pBN, BNU_CHUNK_T w)
 {
    BN_SIGN(pBN)   = ippBigNumPOS;
    BN_SIZE(pBN)   = 1;
@@ -164,14 +164,14 @@ __INLINE IppsBigNumState* BN_Word(IppsBigNumState* pBN, BNU_CHUNK_T w)
    BN_NUMBER(pBN)[0] = w;
    return pBN;
 }
-__INLINE IppsBigNumState* BN_Set(const BNU_CHUNK_T* pData, cpSize len, IppsBigNumState* pBN)
+__IPPCP_INLINE IppsBigNumState* BN_Set(const BNU_CHUNK_T* pData, cpSize len, IppsBigNumState* pBN)
 {
    BN_SIGN(pBN)   = ippBigNumPOS;
    BN_SIZE(pBN)   = len;
    ZEXPAND_COPY_BNU(BN_NUMBER(pBN), BN_ROOM(pBN), pData, len);
    return pBN;
 }
-__INLINE IppsBigNumState* BN_Make(BNU_CHUNK_T* pData, BNU_CHUNK_T* pBuffer, cpSize len, IppsBigNumState* pBN)
+__IPPCP_INLINE IppsBigNumState* BN_Make(BNU_CHUNK_T* pData, BNU_CHUNK_T* pBuffer, cpSize len, IppsBigNumState* pBN)
 {
    BN_SET_ID(pBN);
    BN_SIGN(pBN) = ippBigNumPOS;
diff --git a/sources/ippcp/pcpbninit.c b/sources/ippcp/pcpbninit.c
index 2b249a66..b4c86766 100644
--- a/sources/ippcp/pcpbninit.c
+++ b/sources/ippcp/pcpbninit.c
@@ -57,7 +57,7 @@ IPPFUN(IppStatus, ippsBigNumInit, (int length, IppsBigNumState* pBN))
         cpSize len = INTERNAL_BNU_LENGTH(length);
 
         BN_SIGN(pBN) = ippBigNumPOS;
-        BN_SIZE(pBN) = 1;     /* initial valie is zero */
+        BN_SIZE(pBN) = 1;     /* initial value is zero */
         BN_ROOM(pBN) = len;   /* close to what has been passed by user */
 
                               /* reserve one BNU_CHUNK_T more for cpDiv_BNU,
diff --git a/sources/ippcp/pcpbnu32_arith_sub.c b/sources/ippcp/pcpbnu32_arith_sub.c
index 6ca517b9..dbdaddb5 100644
--- a/sources/ippcp/pcpbnu32_arith_sub.c
+++ b/sources/ippcp/pcpbnu32_arith_sub.c
@@ -14,14 +14,14 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
+/*
 //  Purpose:
 //     Intel(R) Integrated Performance Primitives. Cryptography Primitives.
 //     Internal BNU32 arithmetic.
-// 
+//
 //  Contents:
 //     cpSub_BNU32()
-// 
+//
 */
 
 #include "owncp.h"
@@ -33,7 +33,7 @@
 /*F*
 //    Name: cpSub_BNU32
 //
-// Purpose: substract BNU32.
+// Purpose: subtract BNU32.
 //
 // Returns:
 //    borrow
diff --git a/sources/ippcp/pcpbnu32misc.h b/sources/ippcp/pcpbnu32misc.h
index 308c41c7..c07d26d2 100644
--- a/sources/ippcp/pcpbnu32misc.h
+++ b/sources/ippcp/pcpbnu32misc.h
@@ -34,7 +34,7 @@
 #define cpNLZ_BNU32 OWNAPI(cpNLZ_BNU32)
    IPP_OWN_DECL (cpSize, cpNLZ_BNU32, (Ipp32u x))
 #else
-   __INLINE cpSize cpNLZ_BNU32(Ipp32u x)
+   __IPPCP_INLINE cpSize cpNLZ_BNU32(Ipp32u x)
    {
       return (cpSize)_lzcnt_u32(x);
    }
@@ -52,7 +52,7 @@
 //    nsA      size of BNU
 //
 */
-__INLINE int cpFix_BNU32(const Ipp32u* pA, int nsA)
+__IPPCP_INLINE int cpFix_BNU32(const Ipp32u* pA, int nsA)
 {
    Ipp32u zscan = (Ipp32u)(-1);
    int outLen = nsA;
@@ -67,7 +67,7 @@ __INLINE int cpFix_BNU32(const Ipp32u* pA, int nsA)
 
 /* most significant BNU bit */
 #if 0
-__INLINE int cpMSBit_BNU32(const Ipp32u* pA, cpSize nsA)
+__IPPCP_INLINE int cpMSBit_BNU32(const Ipp32u* pA, cpSize nsA)
 {
    FIX_BNU(pA, nsA);
    return nsA*BITSIZE(Ipp32u) - cpNLZ_BNU32(pA[nsA-1]) -1;
@@ -75,7 +75,7 @@ __INLINE int cpMSBit_BNU32(const Ipp32u* pA, cpSize nsA)
 #endif
 
 #if 0
-__INLINE int cpCmp_BNU32(const Ipp32u* pA, cpSize nsA, const Ipp32u* pB, cpSize nsB)
+__IPPCP_INLINE int cpCmp_BNU32(const Ipp32u* pA, cpSize nsA, const Ipp32u* pB, cpSize nsB)
 {
    if(nsA!=nsB)
       return nsA>nsB? 1 : -1;
diff --git a/sources/ippcp/pcpbnuarith.h b/sources/ippcp/pcpbnuarith.h
index dd36a0f5..1cf5a820 100644
--- a/sources/ippcp/pcpbnuarith.h
+++ b/sources/ippcp/pcpbnuarith.h
@@ -14,12 +14,12 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
+/*
 //  Purpose:
 //     Intel(R) Integrated Performance Primitives.
 //     Internal Unsigned internal arithmetic
-// 
-// 
+//
+//
 */
 
 #if !defined(_CP_BNU_ARITH_H)
@@ -60,7 +60,7 @@
 //
 *F*/
 
-__INLINE BNU_CHUNK_T cpMul_BNU_school(BNU_CHUNK_T* pR,
+__IPPCP_INLINE BNU_CHUNK_T cpMul_BNU_school(BNU_CHUNK_T* pR,
                                 const BNU_CHUNK_T* pA, cpSize nsA,
                                 const BNU_CHUNK_T* pB, cpSize nsB)
 {
@@ -94,7 +94,7 @@ __INLINE BNU_CHUNK_T cpMul_BNU_school(BNU_CHUNK_T* pR,
 //
 *F*/
 
-__INLINE BNU_CHUNK_T cpSqr_BNU_school(BNU_CHUNK_T * pR, const BNU_CHUNK_T * pA, cpSize nsA)
+__IPPCP_INLINE BNU_CHUNK_T cpSqr_BNU_school(BNU_CHUNK_T * pR, const BNU_CHUNK_T * pA, cpSize nsA)
 {
 #if(_ADCOX_NI_ENABLING_==_FEATURE_ON_)
    return cpSqrAdx_BNU_school(pR, pA,nsA);
@@ -114,7 +114,7 @@ __INLINE BNU_CHUNK_T cpSqr_BNU_school(BNU_CHUNK_T * pR, const BNU_CHUNK_T * pA,
 /*
 // multiplication/squaring wrappers
 */
-__INLINE BNU_CHUNK_T cpMul_BNU(BNU_CHUNK_T* pR,
+__IPPCP_INLINE BNU_CHUNK_T cpMul_BNU(BNU_CHUNK_T* pR,
                          const BNU_CHUNK_T* pA, cpSize nsA,
                          const BNU_CHUNK_T* pB, cpSize nsB,
                                BNU_CHUNK_T* pBuffer)
@@ -122,7 +122,7 @@ __INLINE BNU_CHUNK_T cpMul_BNU(BNU_CHUNK_T* pR,
    IPP_UNREFERENCED_PARAMETER(pBuffer);
    return cpMul_BNU_school(pR, pA,nsA, pB,nsB);
 }
-__INLINE BNU_CHUNK_T cpSqr_BNU(BNU_CHUNK_T * pR,
+__IPPCP_INLINE BNU_CHUNK_T cpSqr_BNU(BNU_CHUNK_T * pR,
                          const BNU_CHUNK_T * pA, cpSize nsA,
                                BNU_CHUNK_T* pBuffer)
 {
@@ -148,7 +148,7 @@ __INLINE BNU_CHUNK_T cpSqr_BNU(BNU_CHUNK_T * pR,
 //
 *F*/
 
-__INLINE cpSize cpDiv_BNU(BNU_CHUNK_T* pQ, cpSize* pnsQ, BNU_CHUNK_T* pA, cpSize nsA, BNU_CHUNK_T* pB, cpSize nsB)
+__IPPCP_INLINE cpSize cpDiv_BNU(BNU_CHUNK_T* pQ, cpSize* pnsQ, BNU_CHUNK_T* pA, cpSize nsA, BNU_CHUNK_T* pB, cpSize nsB)
 {
    int nsR = cpDiv_BNU32((Ipp32u*)pQ, pnsQ,
                          (Ipp32u*)pA, nsA*(Ipp32s)(sizeof(BNU_CHUNK_T)/sizeof(Ipp32u)),
@@ -180,7 +180,7 @@ __INLINE cpSize cpDiv_BNU(BNU_CHUNK_T* pQ, cpSize* pnsQ, BNU_CHUNK_T* pA, cpSize
 //
 *F*/
 
-__INLINE cpSize cpMod_BNU(BNU_CHUNK_T* pX, cpSize nsX, BNU_CHUNK_T* pModulus, cpSize nsM)
+__IPPCP_INLINE cpSize cpMod_BNU(BNU_CHUNK_T* pX, cpSize nsX, BNU_CHUNK_T* pModulus, cpSize nsM)
 {
    return cpDiv_BNU(NULL,NULL, pX,nsX, pModulus, nsM);
 }
diff --git a/sources/ippcp/pcpbnumisc.h b/sources/ippcp/pcpbnumisc.h
index 7656ddaf..33b04a82 100644
--- a/sources/ippcp/pcpbnumisc.h
+++ b/sources/ippcp/pcpbnumisc.h
@@ -67,10 +67,10 @@
 
 
 /* copy and set */
-__INLINE void cpCpy_BNU(BNU_CHUNK_T* pDst, const BNU_CHUNK_T* pSrc, cpSize ns)
+__IPPCP_INLINE void cpCpy_BNU(BNU_CHUNK_T* pDst, const BNU_CHUNK_T* pSrc, cpSize ns)
 {  COPY_BNU(pDst, pSrc, ns); }
 
-__INLINE void cpSet_BNU(BNU_CHUNK_T* pDst, cpSize ns, BNU_CHUNK_T val)
+__IPPCP_INLINE void cpSet_BNU(BNU_CHUNK_T* pDst, cpSize ns, BNU_CHUNK_T val)
 {
    ZEXPAND_BNU(pDst, 0, ns);
    pDst[0] = val;
@@ -90,7 +90,7 @@ __INLINE void cpSet_BNU(BNU_CHUNK_T* pDst, cpSize ns, BNU_CHUNK_T val)
 //    nsA      Size of pA
 //
 */
-__INLINE int cpFix_BNU(const BNU_CHUNK_T* pA, int nsA)
+__IPPCP_INLINE int cpFix_BNU(const BNU_CHUNK_T* pA, int nsA)
 {
    BNU_CHUNK_T zscan = (BNU_CHUNK_T)(-1);
    int outLen = nsA;
@@ -120,7 +120,7 @@ __INLINE int cpFix_BNU(const BNU_CHUNK_T* pA, int nsA)
 //
 */
 #if 0
-__INLINE int cpCmp_BNU(const BNU_CHUNK_T* pA, cpSize nsA, const BNU_CHUNK_T* pB, cpSize nsB)
+__IPPCP_INLINE int cpCmp_BNU(const BNU_CHUNK_T* pA, cpSize nsA, const BNU_CHUNK_T* pB, cpSize nsB)
 {
    if(nsA!=nsB)
       return nsA>nsB? 1 : -1;
@@ -133,7 +133,7 @@ __INLINE int cpCmp_BNU(const BNU_CHUNK_T* pA, cpSize nsA, const BNU_CHUNK_T* pB,
 }
 #endif
 
-__INLINE int cpCmp_BNU0(const BNU_CHUNK_T* a, const BNU_CHUNK_T* b, int len)
+__IPPCP_INLINE int cpCmp_BNU0(const BNU_CHUNK_T* a, const BNU_CHUNK_T* b, int len)
 {
    const Ipp32u* a32 = (const Ipp32u*)a;
    const Ipp32u* b32 = (const Ipp32u*)b;
@@ -153,7 +153,7 @@ __INLINE int cpCmp_BNU0(const BNU_CHUNK_T* a, const BNU_CHUNK_T* b, int len)
    return (int)(resb|resd);
 }
 
-__INLINE int cpCmp_BNU(const BNU_CHUNK_T* a, int aLen, const BNU_CHUNK_T* b, int bLen)
+__IPPCP_INLINE int cpCmp_BNU(const BNU_CHUNK_T* a, int aLen, const BNU_CHUNK_T* b, int bLen)
 {
    BNU_CHUNK_T aLen_eq_bLen = cpIsZero_ct((BNU_CHUNK_T)(aLen-bLen));    // FFFF/0000 if (aLen=bLen) / (aLen!=bLen)
    BNU_CHUNK_T aLen_gt_bLen = cpIsMsb_ct((BNU_CHUNK_T)(bLen-aLen)) & 1; // 1/0       if (aLen>bLen) / (aLen<bLen)
@@ -180,7 +180,7 @@ __INLINE int cpCmp_BNU(const BNU_CHUNK_T* a, int aLen, const BNU_CHUNK_T* b, int
 //    b        BNU_CHUNK_T to compare
 //
 */
-__INLINE int cpEqu_BNU_CHUNK(const BNU_CHUNK_T* pA, cpSize nsA, BNU_CHUNK_T b)
+__IPPCP_INLINE int cpEqu_BNU_CHUNK(const BNU_CHUNK_T* pA, cpSize nsA, BNU_CHUNK_T b)
 {
    BNU_CHUNK_T res = pA[0] ^ b;
    int n;
@@ -197,7 +197,7 @@ __INLINE int cpEqu_BNU_CHUNK(const BNU_CHUNK_T* pA, cpSize nsA, BNU_CHUNK_T b)
 //    >0, if A > 0
 //    <0, looks like impossible (or error) case
 */
-__INLINE int cpTst_BNU(const BNU_CHUNK_T* pA, int nsA)
+__IPPCP_INLINE int cpTst_BNU(const BNU_CHUNK_T* pA, int nsA)
 {
    for(; (nsA>0) && (0==pA[nsA-1]); nsA--) ;
    return nsA;
@@ -208,7 +208,7 @@ __INLINE int cpTst_BNU(const BNU_CHUNK_T* pA, int nsA)
 #define cpNLZ_BNU OWNAPI(cpNLZ_BNU)
    IPP_OWN_DECL (cpSize, cpNLZ_BNU, (BNU_CHUNK_T x))
 #else
-   __INLINE cpSize cpNLZ_BNU(BNU_CHUNK_T x)
+   __IPPCP_INLINE cpSize cpNLZ_BNU(BNU_CHUNK_T x)
    {
       #if (BNU_CHUNK_BITS == BNU_CHUNK_64BIT)
          return (cpSize)_lzcnt_u64(x);
diff --git a/sources/ippcp/pcpdescipherm.c b/sources/ippcp/pcpdescipherm.c
index 1bf9de80..a07cfcaa 100644
--- a/sources/ippcp/pcpdescipherm.c
+++ b/sources/ippcp/pcpdescipherm.c
@@ -14,19 +14,19 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     DES Cipher function (MemJam mitigation included)
-// 
+//
 //  Contents:
 //     initial permutation: ip()
 //     final permutation: fp()
 //     round function: rndm()
-//     DES block encypt/decrypt: Chipher_DES()
-// 
-// 
+//     DES block encrypt/decrypt: Chipher_DES()
+//
+//
 */
 
 
diff --git a/sources/ippcp/pcpdlpgeneratedh.c b/sources/ippcp/pcpdlpgeneratedh.c
index 9629a71d..88f5aac2 100644
--- a/sources/ippcp/pcpdlpgeneratedh.c
+++ b/sources/ippcp/pcpdlpgeneratedh.c
@@ -14,17 +14,17 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     DL over Prime Finite Field (generate domain parameters)
-// 
+//
 //  Contents:
 //     ippsDLPGenerateDH()
 //     ippsDLPGenerateDSA()
-// 
-// 
+//
+//
 */
 
 #include "owndefs.h"
@@ -103,7 +103,7 @@ IPPFUN(IppStatus, ippsDLPGenerateDH,(const IppsBigNumState* pSeedIn,
       IppsBigNumState* pSeed1 = cpBigNumListGet(&pList);
       IppsBigNumState* pSeed2 = cpBigNumListGet(&pList);
 
-      /* interally generates SeedIn value by default */
+      /* internally generates SeedIn value by default */
       IppBool seed_is_random = ippTrue;
       int seedBitSize = DLP_BITSIZER(pDL);
 
diff --git a/sources/ippcp/pcpdlpgeneratedsa.c b/sources/ippcp/pcpdlpgeneratedsa.c
index be1b559d..9bfb7855 100644
--- a/sources/ippcp/pcpdlpgeneratedsa.c
+++ b/sources/ippcp/pcpdlpgeneratedsa.c
@@ -109,7 +109,7 @@ IPPFUN(IppStatus, ippsDLPGenerateDSA,(const IppsBigNumState* pSeedIn,
 
       IppsBigNumState* pSeed = cpBigNumListGet(&pList);
 
-      /* interally generates SeedIn value */
+      /* internally generates SeedIn value */
       int seedBitSize = MIN_DLPDSA_SEEDSIZE;
       IppBool seed_is_random = ippTrue;
 
diff --git a/sources/ippcp/pcpeccp.h b/sources/ippcp/pcpeccp.h
index aa108544..311c9fd3 100644
--- a/sources/ippcp/pcpeccp.h
+++ b/sources/ippcp/pcpeccp.h
@@ -14,13 +14,13 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     Internal ECC (prime) basic Definitions & Function Prototypes
-// 
-// 
+//
+//
 */
 
 #if !defined(_NEW_PCP_ECCP_H)
@@ -29,7 +29,7 @@
 #include "pcpgfpecstuff.h"
 
 
-__INLINE IppsBigNumState* cpConstructBN(IppsBigNumState* pBN, cpSize len, BNU_CHUNK_T* pData, BNU_CHUNK_T* pBuffer)
+__IPPCP_INLINE IppsBigNumState* cpConstructBN(IppsBigNumState* pBN, cpSize len, BNU_CHUNK_T* pData, BNU_CHUNK_T* pBuffer)
 {
    BN_SET_ID(pBN);
    BN_SIGN(pBN) = ippBigNumPOS;
@@ -164,7 +164,7 @@ extern const BNU_CHUNK_T h_secp384r1_p[];
 extern const BNU_CHUNK_T h_secp521r1_p[];
 extern const BNU_CHUNK_T h_tpmSM2_p256_p[];
 
-__INLINE BNU_CHUNK_T* cpModAdd_BNU(BNU_CHUNK_T* pR,
+__IPPCP_INLINE BNU_CHUNK_T* cpModAdd_BNU(BNU_CHUNK_T* pR,
                              const BNU_CHUNK_T* pA, const BNU_CHUNK_T* pB,
                              const BNU_CHUNK_T* pM, int ns,
                                    BNU_CHUNK_T* pBuffer)
@@ -175,7 +175,7 @@ __INLINE BNU_CHUNK_T* cpModAdd_BNU(BNU_CHUNK_T* pR,
    return pR;
 }
 
-__INLINE BNU_CHUNK_T* cpModSub_BNU(BNU_CHUNK_T* pR,
+__IPPCP_INLINE BNU_CHUNK_T* cpModSub_BNU(BNU_CHUNK_T* pR,
                              const BNU_CHUNK_T* pA, const BNU_CHUNK_T* pB,
                              const BNU_CHUNK_T* pM, int ns,
                                    BNU_CHUNK_T* pBuffer)
diff --git a/sources/ippcp/pcpgfpecessm2.h b/sources/ippcp/pcpgfpecessm2.h
index fd56c876..5fd1a28a 100644
--- a/sources/ippcp/pcpgfpecessm2.h
+++ b/sources/ippcp/pcpgfpecessm2.h
@@ -54,7 +54,7 @@ struct _cpStateECES_SM2 {
 #define VALID_ECES_SM2_ID(stt) ((((stt)->idCtx) ^ (Ipp32u)IPP_UINT_PTR((stt))) == (Ipp32u)idxCtxECES_SM2)
 
 /* get a byte, update 0-kdf status */
-__INLINE Ipp8u cpECES_SM2KdfNextByte(IppsECESState_SM2* pState) {
+__IPPCP_INLINE Ipp8u cpECES_SM2KdfNextByte(IppsECESState_SM2* pState) {
    if (pState->kdfIndex == IPP_SM3_DIGEST_BITSIZE / BYTESIZE) {
       ++pState->kdfCounter;
       pState->kdfIndex = 0;
diff --git a/sources/ippcp/pcpgfpecstuff.h b/sources/ippcp/pcpgfpecstuff.h
index 79a593c9..4d432ae9 100644
--- a/sources/ippcp/pcpgfpecstuff.h
+++ b/sources/ippcp/pcpgfpecstuff.h
@@ -188,22 +188,22 @@ IPP_OWN_DECL (const cpPrecompAP*, gfpec_precom_sm2_radix52_fun,        (void))
 /*
 // get/release n points from/to the pool
 */
-__INLINE BNU_CHUNK_T* cpEcGFpGetPool(int n, IppsGFpECState* pEC)
+__IPPCP_INLINE BNU_CHUNK_T* cpEcGFpGetPool(int n, IppsGFpECState* pEC)
 {
    BNU_CHUNK_T* pPool = ECP_POOL(pEC);
    ECP_POOL(pEC) += n*GFP_FELEN(GFP_PMA(ECP_GFP(pEC)))*3;
    return pPool;
 }
-__INLINE void cpEcGFpReleasePool(int n, IppsGFpECState* pEC)
+__IPPCP_INLINE void cpEcGFpReleasePool(int n, IppsGFpECState* pEC)
 {
    int chunk_size = n*GFP_FELEN(GFP_PMA(ECP_GFP(pEC)))*3;
    ECP_POOL(pEC) -= chunk_size;
-   // Clean the pool for the security reasons 
+   // Clean the pool for the security reasons
    // (intermediate sensitive data may be stored here)
-   ZEXPAND_BNU(ECP_POOL(pEC), 0, chunk_size);   
+   ZEXPAND_BNU(ECP_POOL(pEC), 0, chunk_size);
 }
 
-__INLINE IppsGFpECPoint* cpEcGFpInitPoint(IppsGFpECPoint* pPoint, BNU_CHUNK_T* pData, int flags, const IppsGFpECState* pEC)
+__IPPCP_INLINE IppsGFpECPoint* cpEcGFpInitPoint(IppsGFpECPoint* pPoint, BNU_CHUNK_T* pData, int flags, const IppsGFpECState* pEC)
 {
    ECP_POINT_SET_ID(pPoint);
    ECP_POINT_FLAGS(pPoint) = flags;
@@ -213,7 +213,7 @@ __INLINE IppsGFpECPoint* cpEcGFpInitPoint(IppsGFpECPoint* pPoint, BNU_CHUNK_T* p
 }
 
 /* copy one point into another */
-__INLINE IppsGFpECPoint* gfec_CopyPoint(IppsGFpECPoint* pPointR, const IppsGFpECPoint* pPointA, int elemLen)
+__IPPCP_INLINE IppsGFpECPoint* gfec_CopyPoint(IppsGFpECPoint* pPointR, const IppsGFpECPoint* pPointA, int elemLen)
 {
    cpGFpElementCopy(ECP_POINT_DATA(pPointR), ECP_POINT_DATA(pPointA), 3*elemLen);
    ECP_POINT_FLAGS(pPointR) = ECP_POINT_FLAGS(pPointA);
@@ -221,7 +221,7 @@ __INLINE IppsGFpECPoint* gfec_CopyPoint(IppsGFpECPoint* pPointR, const IppsGFpEC
 }
 
 
-__INLINE IppsGFpECPoint* gfec_SetPointAtInfinity(IppsGFpECPoint* pPoint)
+__IPPCP_INLINE IppsGFpECPoint* gfec_SetPointAtInfinity(IppsGFpECPoint* pPoint)
 {
    int elemLen = ECP_POINT_FELEN(pPoint);
    cpGFpElementPad(ECP_POINT_X(pPoint), elemLen, 0);
@@ -235,7 +235,7 @@ __INLINE IppsGFpECPoint* gfec_SetPointAtInfinity(IppsGFpECPoint* pPoint)
 // test infinity:
 //    IsProjectivePointAtInfinity
 */
-__INLINE int gfec_IsPointAtInfinity(const IppsGFpECPoint* pPoint)
+__IPPCP_INLINE int gfec_IsPointAtInfinity(const IppsGFpECPoint* pPoint)
 {
    return GFP_IS_ZERO( ECP_POINT_Z(pPoint), ECP_POINT_FELEN(pPoint));
 }
@@ -243,7 +243,7 @@ __INLINE int gfec_IsPointAtInfinity(const IppsGFpECPoint* pPoint)
 
 
 /* signed encode */
-__INLINE void booth_recode(Ipp8u* sign, Ipp8u* digit, Ipp8u in, int w)
+__IPPCP_INLINE void booth_recode(Ipp8u* sign, Ipp8u* digit, Ipp8u in, int w)
 {
    Ipp8u s = (Ipp8u)(~((in >> w) - 1));
    int d = (1 << (w+1)) - in - 1;
@@ -288,7 +288,7 @@ IPP_OWN_DECL (int, gfec_MakePoint, (IppsGFpECPoint* pPoint, const BNU_CHUNK_T* p
 IPP_OWN_DECL (int, gfec_ComparePoint, (const IppsGFpECPoint* pP, const IppsGFpECPoint* pQ, IppsGFpECState* pEC))
 IPP_OWN_DECL (int, gfec_IsPointOnCurve, (const IppsGFpECPoint* pP, IppsGFpECState* pEC))
 
-__INLINE IppsGFpECPoint* gfec_DblPoint(IppsGFpECPoint* pR,
+__IPPCP_INLINE IppsGFpECPoint* gfec_DblPoint(IppsGFpECPoint* pR,
                         const IppsGFpECPoint* pP, IppsGFpECState* pEC)
 {
    gfec_point_double(ECP_POINT_X(pR), ECP_POINT_X(pP), pEC);
@@ -296,7 +296,7 @@ __INLINE IppsGFpECPoint* gfec_DblPoint(IppsGFpECPoint* pR,
    return pR;
 }
 
-__INLINE IppsGFpECPoint* gfec_AddPoint(IppsGFpECPoint* pR,
+__IPPCP_INLINE IppsGFpECPoint* gfec_AddPoint(IppsGFpECPoint* pR,
                         const IppsGFpECPoint* pP, const IppsGFpECPoint* pQ,
                         IppsGFpECState* pEC)
 {
diff --git a/sources/ippcp/pcpgfpstuff.h b/sources/ippcp/pcpgfpstuff.h
index d7adeb84..17aca720 100644
--- a/sources/ippcp/pcpgfpstuff.h
+++ b/sources/ippcp/pcpgfpstuff.h
@@ -89,24 +89,24 @@ typedef struct _cpGFp {
 #define cpGFpReleasePool(n, gfe) gsModPoolFree((gfe), (n))
 
 
-__INLINE int cpGFpElementLen(const BNU_CHUNK_T* pE, int nsE)
+__IPPCP_INLINE int cpGFpElementLen(const BNU_CHUNK_T* pE, int nsE)
 {
    for(; nsE>1 && 0==pE[nsE-1]; nsE--) ;
    return nsE;
 }
-__INLINE BNU_CHUNK_T* cpGFpElementCopy(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pE, int nsE)
+__IPPCP_INLINE BNU_CHUNK_T* cpGFpElementCopy(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pE, int nsE)
 {
    int n;
    for(n=0; n<nsE; n++) pR[n] = pE[n];
    return pR;
 }
-__INLINE BNU_CHUNK_T* cpGFpElementPad(BNU_CHUNK_T* pE, int nsE, BNU_CHUNK_T filler)
+__IPPCP_INLINE BNU_CHUNK_T* cpGFpElementPad(BNU_CHUNK_T* pE, int nsE, BNU_CHUNK_T filler)
 {
    int n;
    for(n=0; n<nsE; n++) pE[n] = filler;
    return pE;
 }
-__INLINE BNU_CHUNK_T* cpGFpElementCopyPad(BNU_CHUNK_T* pR, int nsR, const BNU_CHUNK_T* pE, int nsE)
+__IPPCP_INLINE BNU_CHUNK_T* cpGFpElementCopyPad(BNU_CHUNK_T* pR, int nsR, const BNU_CHUNK_T* pE, int nsE)
 {
    int n;
    for(n=0; n<nsE; n++) pR[n] = pE[n];
@@ -114,7 +114,7 @@ __INLINE BNU_CHUNK_T* cpGFpElementCopyPad(BNU_CHUNK_T* pR, int nsR, const BNU_CH
    return pR;
 }
 
-__INLINE int cpGFpElementIsEquChunk(const BNU_CHUNK_T* pE, int nsE, BNU_CHUNK_T x)
+__IPPCP_INLINE int cpGFpElementIsEquChunk(const BNU_CHUNK_T* pE, int nsE, BNU_CHUNK_T x)
 {
    BNU_CHUNK_T res = pE[0] ^ x;
    int n;
@@ -123,37 +123,37 @@ __INLINE int cpGFpElementIsEquChunk(const BNU_CHUNK_T* pE, int nsE, BNU_CHUNK_T
    return cpIsZero_ct(res) & 1;
 }
 
-__INLINE BNU_CHUNK_T* cpGFpElementSetChunk(BNU_CHUNK_T* pR, int nsR, BNU_CHUNK_T x)
+__IPPCP_INLINE BNU_CHUNK_T* cpGFpElementSetChunk(BNU_CHUNK_T* pR, int nsR, BNU_CHUNK_T x)
 {
    return cpGFpElementCopyPad(pR, nsR, &x, 1);
 }
 
-__INLINE BNU_CHUNK_T* cpGFpAdd(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, const BNU_CHUNK_T* pB, gsModEngine* pGFE)
+__IPPCP_INLINE BNU_CHUNK_T* cpGFpAdd(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, const BNU_CHUNK_T* pB, gsModEngine* pGFE)
 {
    return GFP_METHOD(pGFE)->add(pR, pA, pB, pGFE);
 }
 
-__INLINE BNU_CHUNK_T* cpGFpSub(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, const BNU_CHUNK_T* pB, gsModEngine* pGFE)
+__IPPCP_INLINE BNU_CHUNK_T* cpGFpSub(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, const BNU_CHUNK_T* pB, gsModEngine* pGFE)
 {
    return GFP_METHOD(pGFE)->sub(pR, pA, pB, pGFE);
 }
 
-__INLINE BNU_CHUNK_T* cpGFpNeg(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, gsModEngine* pGFE)
+__IPPCP_INLINE BNU_CHUNK_T* cpGFpNeg(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, gsModEngine* pGFE)
 {
    return GFP_METHOD(pGFE)->neg(pR, pA, pGFE);
 }
 
-__INLINE BNU_CHUNK_T* cpGFpMul(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, const BNU_CHUNK_T* pB, gsModEngine* pGFE)
+__IPPCP_INLINE BNU_CHUNK_T* cpGFpMul(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, const BNU_CHUNK_T* pB, gsModEngine* pGFE)
 {
    return GFP_METHOD(pGFE)->mul(pR, pA, pB, pGFE);
 }
 
-__INLINE BNU_CHUNK_T* cpGFpSqr(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, gsModEngine* pGFE)
+__IPPCP_INLINE BNU_CHUNK_T* cpGFpSqr(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, gsModEngine* pGFE)
 {
    return GFP_METHOD(pGFE)->sqr(pR, pA, pGFE);
 }
 
-__INLINE BNU_CHUNK_T* cpGFpHalve(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, gsModEngine* pGFE)
+__IPPCP_INLINE BNU_CHUNK_T* cpGFpHalve(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, gsModEngine* pGFE)
 {
    return GFP_METHOD(pGFE)->div2(pR, pA, pGFE);
 }
@@ -169,7 +169,7 @@ __INLINE BNU_CHUNK_T* cpGFpHalve(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, gsModEn
 
 
 /* construct GF element */
-__INLINE IppsGFpElement* cpGFpElementConstruct(IppsGFpElement* pR, BNU_CHUNK_T* pDataBufer, int ns)
+__IPPCP_INLINE IppsGFpElement* cpGFpElementConstruct(IppsGFpElement* pR, BNU_CHUNK_T* pDataBufer, int ns)
 {
    GFPE_SET_ID(pR);
    GFPE_ROOM(pR) = ns;
diff --git a/sources/ippcp/pcpgfpxinit.c b/sources/ippcp/pcpgfpxinit.c
index 51e8bb9a..0b0e8430 100644
--- a/sources/ippcp/pcpgfpxinit.c
+++ b/sources/ippcp/pcpgfpxinit.c
@@ -14,10 +14,10 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
+/*
 //     Intel(R) Integrated Performance Primitives. Cryptography Primitives.
 //     Operations over GF(p) ectension.
-// 
+//
 //     Context:
 //        pcpgfpxinit.c()
 //
@@ -51,7 +51,7 @@
 //                                  (IPP_MIN_GF_EXTDEG==2, IPP_MAX_GF_EXTDEG==8)
 //                               1>nElm || nElm>extDeg
 //
-//                               cpID_Poly!=pGFpMethod->modulusID  -- method does not refferenced to polynomial one
+//                               cpID_Poly!=pGFpMethod->modulusID  -- method does not reference the polynomial one
 //                               pGFpMethod->modulusBitDeg!=extDeg -- fixed method does not match to degree extension
 //
 //    ippStsNoErr                no error
diff --git a/sources/ippcp/pcpgfpxinitbinomial.c b/sources/ippcp/pcpgfpxinitbinomial.c
index 96dd34f1..062a4cf7 100644
--- a/sources/ippcp/pcpgfpxinitbinomial.c
+++ b/sources/ippcp/pcpgfpxinitbinomial.c
@@ -14,10 +14,10 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
+/*
 //     Intel(R) Integrated Performance Primitives. Cryptography Primitives.
 //     Operations over GF(p) ectension.
-// 
+//
 //     Context:
 //        pcpgfpxinitbinomial.c()
 //
@@ -49,7 +49,7 @@
 //    ippStsBadArgErr            IPP_MIN_GF_EXTDEG > extDeg || extDeg > IPP_MAX_GF_EXTDEG
 //                                  (IPP_MIN_GF_EXTDEG==2, IPP_MAX_GF_EXTDEG==8)
 //
-//                               cpID_Poly!=pGFpMethod->modulusID  -- method does not refferenced to polynomial one
+//                               cpID_Poly!=pGFpMethod->modulusID  -- method does not reference the polynomial one
 //                               pGFpMethod->modulusBitDeg!=extDeg -- fixed method does not match to degree extension
 //
 //    ippStsNoErr                no error
diff --git a/sources/ippcp/pcpgfpxmethod_binom_epid2.c b/sources/ippcp/pcpgfpxmethod_binom_epid2.c
index 2798decf..0daaeda5 100644
--- a/sources/ippcp/pcpgfpxmethod_binom_epid2.c
+++ b/sources/ippcp/pcpgfpxmethod_binom_epid2.c
@@ -57,7 +57,7 @@
 // The case is important in GF(((p^2)^3)^2) arithmetic for Intel(R) EPID 2.0.
 //
 */
-__INLINE BNU_CHUNK_T* cpFq6Mul_vi(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, gsEngine* pGFEx)
+__IPPCP_INLINE BNU_CHUNK_T* cpFq6Mul_vi(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, gsEngine* pGFEx)
 {
    gsEngine* pGroundGFE = GFP_PARENT(pGFEx);
    int termLen = GFP_FELEN(pGroundGFE);
@@ -250,7 +250,7 @@ static gsModMethod* gsPolyArith_binom2_epid2 (void)
 //
 // Purpose: Returns a reference to the implementation of arithmetic operations over GF(pd).
 //
-// Returns:          pointer to a structure containing 
+// Returns:          pointer to a structure containing
 //                   an implementation of arithmetic operations over GF(pd)
 //                   g(x) = x^2 - a0, a0 from GF(q), a0 = 1
 //                   g(w) = w^2 - V0, v0 from GF((q^2)^3), V0 = 0*s^2 + v + 0
diff --git a/sources/ippcp/pcpgfpxmethod_binom_epid2.h b/sources/ippcp/pcpgfpxmethod_binom_epid2.h
index 2b097c98..1f492b71 100644
--- a/sources/ippcp/pcpgfpxmethod_binom_epid2.h
+++ b/sources/ippcp/pcpgfpxmethod_binom_epid2.h
@@ -57,7 +57,7 @@
 // The case is important in GF((p^2)^3) arithmetic for Intel(R) EPID 2.0.
 //
 */
-__INLINE BNU_CHUNK_T* cpFq2Mul_xi(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, gsEngine* pGFEx)
+__IPPCP_INLINE BNU_CHUNK_T* cpFq2Mul_xi(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, gsEngine* pGFEx)
 {
    gsEngine* pGroundGFE = GFP_PARENT(pGFEx);
    mod_mul addF = GFP_METHOD(pGroundGFE)->add;
diff --git a/sources/ippcp/pcpgfpxstuff.h b/sources/ippcp/pcpgfpxstuff.h
index b5d63881..d892e3c5 100644
--- a/sources/ippcp/pcpgfpxstuff.h
+++ b/sources/ippcp/pcpgfpxstuff.h
@@ -14,11 +14,11 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
+/*
 //               Intel(R) Integrated Performance Primitives
 //               Cryptographic Primitives (ippCP)
 //               GF(p) extension internal
-// 
+//
 */
 
 #if !defined(_PCP_GFPEXT_H_)
@@ -35,7 +35,7 @@
 #define GFPX_IDX_ELEMENT(pxe, idx, eleSize) ((pxe)+(eleSize)*(idx))
 
 
-__INLINE int degree(const BNU_CHUNK_T* pE, const gsModEngine* pGFEx)
+__IPPCP_INLINE int degree(const BNU_CHUNK_T* pE, const gsModEngine* pGFEx)
 {
     int groundElemLen = GFP_FELEN(GFP_PARENT(pGFEx));
     int deg;
@@ -45,14 +45,14 @@ __INLINE int degree(const BNU_CHUNK_T* pE, const gsModEngine* pGFEx)
     return deg;
 }
 
-__INLINE gsModEngine* cpGFpBasic(const gsModEngine* pGFEx)
+__IPPCP_INLINE gsModEngine* cpGFpBasic(const gsModEngine* pGFEx)
 {
    while( !GFP_IS_BASIC(pGFEx) ) {
       pGFEx = GFP_PARENT(pGFEx);
    }
    return (gsModEngine*)pGFEx;
 }
-__INLINE int cpGFpBasicDegreeExtension(const gsModEngine* pGFEx)
+__IPPCP_INLINE int cpGFpBasicDegreeExtension(const gsModEngine* pGFEx)
 {
    int degree = GFP_EXTDEGREE(pGFEx);
    while( !GFP_IS_BASIC(pGFEx) ) {
@@ -65,7 +65,7 @@ __INLINE int cpGFpBasicDegreeExtension(const gsModEngine* pGFEx)
 /* convert external data (Ipp32u) => internal element (BNU_CHUNK_T) representation
    returns length of element (in BNU_CHUNK_T)
 */
-__INLINE int cpGFpxCopyToChunk(BNU_CHUNK_T* pElm, const Ipp32u* pA, int nsA, const gsModEngine* pGFEx)
+__IPPCP_INLINE int cpGFpxCopyToChunk(BNU_CHUNK_T* pElm, const Ipp32u* pA, int nsA, const gsModEngine* pGFEx)
 {
    gsModEngine* pBasicGFE = cpGFpBasic(pGFEx);
    int basicExtension = cpGFpBasicDegreeExtension(pGFEx);
@@ -84,7 +84,7 @@ __INLINE int cpGFpxCopyToChunk(BNU_CHUNK_T* pElm, const Ipp32u* pA, int nsA, con
 /* convert internal element (BNU_CHUNK_T) => external data (Ipp32u) representation
    returns length of data (in Ipp32u)
 */
-__INLINE int cpGFpxCopyFromChunk(Ipp32u* pA, const BNU_CHUNK_T* pElm, const gsModEngine* pGFEx)
+__IPPCP_INLINE int cpGFpxCopyFromChunk(Ipp32u* pA, const BNU_CHUNK_T* pElm, const gsModEngine* pGFEx)
 {
    gsModEngine* pBasicGFE = cpGFpBasic(pGFEx);
    int basicExtension = cpGFpBasicDegreeExtension(pGFEx);
diff --git a/sources/ippcp/pcphash.h b/sources/ippcp/pcphash.h
index bf24edf7..dcc1ba75 100644
--- a/sources/ippcp/pcphash.h
+++ b/sources/ippcp/pcphash.h
@@ -14,14 +14,14 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     Security Hash Standard
 //     Internal Definitions and Internal Functions Prototypes
-// 
-// 
+//
+//
 */
 
 #if !defined(_PCP_HASH_H)
@@ -183,19 +183,19 @@ extern const Ipp8u* cpHashIV[];
 extern const cpHashAttr cpHashAlgAttr[];
 
 /* IV size helper */
-__INLINE int cpHashIvSize(IppHashAlgId algID)
+__IPPCP_INLINE int cpHashIvSize(IppHashAlgId algID)
 { return cpHashAlgAttr[algID].ivSize; }
 
 /* hash size helper */
-__INLINE int cpHashSize(IppHashAlgId algID)
+__IPPCP_INLINE int cpHashSize(IppHashAlgId algID)
 { return cpHashAlgAttr[algID].hashSize; }
 
 /* message block size helper */
-__INLINE int cpHashMBS(IppHashAlgId algID)
+__IPPCP_INLINE int cpHashMBS(IppHashAlgId algID)
 { return cpHashAlgAttr[algID].msgBlkSize; }
 
 /* maps algID into enabled IppHashAlgId value */
-__INLINE IppHashAlgId cpValidHashAlg(IppHashAlgId algID)
+__IPPCP_INLINE IppHashAlgId cpValidHashAlg(IppHashAlgId algID)
 {
    /* maps algID into the valid range */
    algID = (((int)ippHashAlg_Unknown < (int)algID) && ((int)algID < (int)ippHashAlg_MaxNo))? algID : ippHashAlg_Unknown;
diff --git a/sources/ippcp/pcphashcnt.c b/sources/ippcp/pcphashcnt.c
index 4456b452..7bd0b8de 100644
--- a/sources/ippcp/pcphashcnt.c
+++ b/sources/ippcp/pcphashcnt.c
@@ -14,14 +14,14 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     Security Hash Standard
 //     Constants
-// 
-// 
+//
+//
 */
 
 #include "owndefs.h"
@@ -219,7 +219,7 @@ const Ipp8u* cpHashIV[] = {
 ////////////////////////////////////////////////////////////
 
 /*
-// additive constatns
+// additive constants
 */
 #if defined(_ENABLE_ALG_SHA1_)
 __ALIGN16 const Ipp32u SHA1_cnt[] = {
diff --git a/sources/ippcp/pcphashsha1px.c b/sources/ippcp/pcphashsha1px.c
index 34b9fedd..31b7476c 100644
--- a/sources/ippcp/pcphashsha1px.c
+++ b/sources/ippcp/pcphashsha1px.c
@@ -14,16 +14,16 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     Message block processing according to SHA1
-// 
+//
 //  Contents:
 //     UpdateSHA1()
-// 
-// 
+//
+//
 */
 
 #include "owndefs.h"
@@ -62,7 +62,7 @@
 }
 
 #if defined(_ALG_SHA1_COMPACT_)
-__INLINE Ipp32u MagicFun(int s, Ipp32u b, Ipp32u c, Ipp32u d)
+__IPPCP_INLINE Ipp32u MagicFun(int s, Ipp32u b, Ipp32u c, Ipp32u d)
 {
    switch(s) {
       case 0: return MAGIC_F0(b,c,d);
diff --git a/sources/ippcp/pcphashsm3px.c b/sources/ippcp/pcphashsm3px.c
index c5392180..e8267039 100644
--- a/sources/ippcp/pcphashsm3px.c
+++ b/sources/ippcp/pcphashsm3px.c
@@ -14,16 +14,16 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     Message block processing according to SM5
-// 
+//
 //  Contents:
 //     UpdateSM3()
-// 
-// 
+//
+//
 */
 
 #include "owndefs.h"
@@ -141,14 +141,14 @@
 *F*/
 #if defined(_ALG_SM3_COMPACT_)
 
-__INLINE Ipp32u MagicFF(int s, Ipp32u a, Ipp32u b, Ipp32u c)
+__IPPCP_INLINE Ipp32u MagicFF(int s, Ipp32u a, Ipp32u b, Ipp32u c)
 {
    switch(s) {
       case 0: return FF1(a,b,c);
       default:return FF2(a,b,c);
    }
 }
-__INLINE Ipp32u MagicGG(int s, Ipp32u e, Ipp32u f, Ipp32u g)
+__IPPCP_INLINE Ipp32u MagicGG(int s, Ipp32u e, Ipp32u f, Ipp32u g)
 {
    switch(s) {
       case 0: return GG1(e,f,g);
diff --git a/sources/ippcp/pcphashupdate.c b/sources/ippcp/pcphashupdate.c
index eb2b5642..940a3079 100644
--- a/sources/ippcp/pcphashupdate.c
+++ b/sources/ippcp/pcphashupdate.c
@@ -14,13 +14,13 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     Security Hash Standard
 //     General Functionality
-// 
+//
 //  Contents:
 //        ippsHashUpdate()
 //
@@ -50,7 +50,7 @@
 //    pState   pointer to the Hash context
 //
 *F*/
-__INLINE int IsExceedMsgLen(Ipp64u maxLo, Ipp64u maxHi, Ipp64u lenLo, Ipp64u lenHi)
+__IPPCP_INLINE int IsExceedMsgLen(Ipp64u maxLo, Ipp64u maxHi, Ipp64u lenLo, Ipp64u lenHi)
 {
    int isExceed = lenLo > maxLo;
    isExceed = (lenHi+(Ipp64u)isExceed) > maxHi;
diff --git a/sources/ippcp/pcpmask_ct.h b/sources/ippcp/pcpmask_ct.h
index 85245e23..0fa80c58 100644
--- a/sources/ippcp/pcpmask_ct.h
+++ b/sources/ippcp/pcpmask_ct.h
@@ -81,7 +81,7 @@ static __NOINLINE BNU_CHUNK_T cpIsMsb_ct(BNU_CHUNK_T a)
 #else
 
 /* replace under mask: dst[] = replaceFlag? src[] : dst[] */
-__INLINE void cpMaskedReplace_ct(BNU_CHUNK_T* dst, const BNU_CHUNK_T* src, int len, BNU_CHUNK_T replaceMask)
+__IPPCP_INLINE void cpMaskedReplace_ct(BNU_CHUNK_T* dst, const BNU_CHUNK_T* src, int len, BNU_CHUNK_T replaceMask)
 {
    BNU_CHUNK_T dstMask = ~replaceMask;
    int n;
@@ -90,7 +90,7 @@ __INLINE void cpMaskedReplace_ct(BNU_CHUNK_T* dst, const BNU_CHUNK_T* src, int l
 }
 
 /* copy under mask: dst[] = src1[] & mask) ^ src2[] & ~mask  */
-__INLINE void cpMaskedCopyBNU_ct(BNU_CHUNK_T* dst, BNU_CHUNK_T mask, const BNU_CHUNK_T* src1, const BNU_CHUNK_T* src2, int len)
+__IPPCP_INLINE void cpMaskedCopyBNU_ct(BNU_CHUNK_T* dst, BNU_CHUNK_T mask, const BNU_CHUNK_T* src1, const BNU_CHUNK_T* src2, int len)
 {
    int i;
    for(i=0; i<len; i++)
@@ -98,7 +98,7 @@ __INLINE void cpMaskedCopyBNU_ct(BNU_CHUNK_T* dst, BNU_CHUNK_T mask, const BNU_C
 }
 
 /* tests if MSB(a)==1 */
-__INLINE BNU_CHUNK_T cpIsMsb_ct(BNU_CHUNK_T a)
+__IPPCP_INLINE BNU_CHUNK_T cpIsMsb_ct(BNU_CHUNK_T a)
 {
    return (BNU_CHUNK_T)0 - (a >> (sizeof(a) * 8 - 1));
 }
@@ -110,43 +110,43 @@ __INLINE BNU_CHUNK_T cpIsMsb_ct(BNU_CHUNK_T a)
 #endif
 
 /* tests if LSB(a)==1 */
-__INLINE BNU_CHUNK_T cpIsLsb_ct(BNU_CHUNK_T a)
+__IPPCP_INLINE BNU_CHUNK_T cpIsLsb_ct(BNU_CHUNK_T a)
 {
    return (BNU_CHUNK_T)0 - (a & 1);
 }
 
 /* tests if a is odd */
-__INLINE BNU_CHUNK_T cpIsOdd_ct(BNU_CHUNK_T a)
+__IPPCP_INLINE BNU_CHUNK_T cpIsOdd_ct(BNU_CHUNK_T a)
 {
    return cpIsLsb_ct(a);
 }
 
 /* tests if a is even */
-__INLINE BNU_CHUNK_T cpIsEven_ct(BNU_CHUNK_T a)
+__IPPCP_INLINE BNU_CHUNK_T cpIsEven_ct(BNU_CHUNK_T a)
 {
    return ~cpIsLsb_ct(a);
 }
 
 /* tests if a==0 */
-__INLINE BNU_CHUNK_T cpIsZero_ct(BNU_CHUNK_T a)
+__IPPCP_INLINE BNU_CHUNK_T cpIsZero_ct(BNU_CHUNK_T a)
 {
    return cpIsMsb_ct(~a & (a - 1));
 }
 
 /* tests if a==b */
-__INLINE BNU_CHUNK_T cpIsEqu_ct(BNU_CHUNK_T a, BNU_CHUNK_T b)
+__IPPCP_INLINE BNU_CHUNK_T cpIsEqu_ct(BNU_CHUNK_T a, BNU_CHUNK_T b)
 {
    return cpIsZero_ct(a ^ b);
 }
 
 /* test if a<b */
-__INLINE BNU_CHUNK_T cpIsLt_ct(BNU_CHUNK_T a, BNU_CHUNK_T b)
+__IPPCP_INLINE BNU_CHUNK_T cpIsLt_ct(BNU_CHUNK_T a, BNU_CHUNK_T b)
 {
    return cpIsMsb_ct(a ^ ((a ^ b) | ((a - b) ^ b)));
 }
 
 /* test if GF element is equal to x chunk */
-__INLINE BNU_CHUNK_T cpIsGFpElemEquChunk_ct(const BNU_CHUNK_T* pE, int nsE, BNU_CHUNK_T x)
+__IPPCP_INLINE BNU_CHUNK_T cpIsGFpElemEquChunk_ct(const BNU_CHUNK_T* pE, int nsE, BNU_CHUNK_T x)
 {
    int i;
    BNU_CHUNK_T accum = pE[0] ^ x;
@@ -157,7 +157,7 @@ __INLINE BNU_CHUNK_T cpIsGFpElemEquChunk_ct(const BNU_CHUNK_T* pE, int nsE, BNU_
 }
 
 /* test if memory blocks are equal */
-__INLINE BNU_CHUNK_T cpIsEquBlock_ct(const void* pSrc1, const void* pSrc2, int len)
+__IPPCP_INLINE BNU_CHUNK_T cpIsEquBlock_ct(const void* pSrc1, const void* pSrc2, int len)
 {
    const Ipp8u* p1 = (const Ipp8u*)pSrc1;
    const Ipp8u* p2 = (const Ipp8u*)pSrc2;
@@ -171,17 +171,17 @@ __INLINE BNU_CHUNK_T cpIsEquBlock_ct(const void* pSrc1, const void* pSrc2, int l
 #define GFPE_IS_ZERO_CT(a,size)  cpIsGFpElemEquChunk_ct((a),(size), 0)
 
 /* r = mask? a : b */
-__INLINE BNU_CHUNK_T cpSelect_ct(BNU_CHUNK_T mask, BNU_CHUNK_T a, BNU_CHUNK_T b)
+__IPPCP_INLINE BNU_CHUNK_T cpSelect_ct(BNU_CHUNK_T mask, BNU_CHUNK_T a, BNU_CHUNK_T b)
 {
    return (mask & a) | (~mask & b);
 }
 
-__INLINE int cpSelect_ct_int(BNU_CHUNK_T mask, int a, int b)
+__IPPCP_INLINE int cpSelect_ct_int(BNU_CHUNK_T mask, int a, int b)
 {
    return (int)cpSelect_ct(mask, (BNU_CHUNK_T)a, (BNU_CHUNK_T)b);
 }
 
-__INLINE Ipp8u cpSelect_ct_8u(BNU_CHUNK_T mask, Ipp8u a, Ipp8u b)
+__IPPCP_INLINE Ipp8u cpSelect_ct_8u(BNU_CHUNK_T mask, Ipp8u a, Ipp8u b)
 {
    return (Ipp8u)cpSelect_ct(mask, a, b);
 }
diff --git a/sources/ippcp/pcpmont_multiexp_fast.c b/sources/ippcp/pcpmont_multiexp_fast.c
index a3ffa118..273f7b86 100644
--- a/sources/ippcp/pcpmont_multiexp_fast.c
+++ b/sources/ippcp/pcpmont_multiexp_fast.c
@@ -16,7 +16,7 @@
 
 /*
 //     Intel(R) Integrated Performance Primitives. Cryptography Primitives.
-// 
+//
 //     Context:
 //        cpFastMontMultiExp()
 //
@@ -49,9 +49,9 @@ static cpSize GetIndex(const Ipp8u** ppE, cpSize numItems, cpSize nBit)
 //    y = x[0]^e[0] * x[1]^e[1] *...* x[numItems-1]^e[numItems-1] mod M
 //
 // Input:
-//    - table pTbl of precomuted values pTbl[i] = x[0]^i[0] * x[1]^i[1] *...* x[numItems-1]^i[numItems-1] mod M,
+//    - table pTbl of precomputed values pTbl[i] = x[0]^i[0] * x[1]^i[1] *...* x[numItems-1]^i[numItems-1] mod M,
 //      where i[0], i[1], ..., i[numItems-1] are bits of i value;
-//      each entry has sizeM length (i.e. equial to modulo M size)
+//      each entry has sizeM length (i.e. equal to modulo M size)
 //    - array of pointers to the BNU exponents e[0], e[1],...,e[numItems-1]
 //    - pointer to the Montgomery engine
 */
diff --git a/sources/ippcp/pcpmont_multiexpinitarray.c b/sources/ippcp/pcpmont_multiexpinitarray.c
index cae5e966..3d94759f 100644
--- a/sources/ippcp/pcpmont_multiexpinitarray.c
+++ b/sources/ippcp/pcpmont_multiexpinitarray.c
@@ -16,7 +16,7 @@
 
 /*
 //     Intel(R) Integrated Performance Primitives. Cryptography Primitives.
-// 
+//
 //     Context:
 //        cpMontMultiExpInitArray()
 //
@@ -27,18 +27,18 @@
 #include "pcpbn.h"
 #include "pcpmontgomery.h"
 
-/* 
+/*
 //               Intel(R) Integrated Performance Primitives
 //                   Cryptographic Primitives (ippcp)
-// 
-// 
+//
+//
 */
 /*
 // Initialize multi-exponentiation computation
 //    y = x[0]^e[0] * x[1]^e[1] *...* x[numItems-1]^e[numItems-1] mod M
 //
 // Output:
-//    - table pTbl of precomuted values pTbl[i] = x[0]^i[0] * x[1]^i[1] *...* x[numItems-1]^i[numItems-1] mod M,
+//    - table pTbl of precomputed values pTbl[i] = x[0]^i[0] * x[1]^i[1] *...* x[numItems-1]^i[numItems-1] mod M,
 //      where i[0], i[1], ..., i[numItems-1] are bits of i value;
 //
 // Input:
diff --git a/sources/ippcp/pcpmont_set.c b/sources/ippcp/pcpmont_set.c
index fa8bd2de..99035755 100644
--- a/sources/ippcp/pcpmont_set.c
+++ b/sources/ippcp/pcpmont_set.c
@@ -14,10 +14,10 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
+/*
 //               Intel(R) Integrated Performance Primitives
 //                   Cryptographic Primitives (ippcp)
-// 
+//
 //  Contents:
 //        cpMontSet()
 //
@@ -31,7 +31,7 @@
 
 
 /* Auxilirary function */
-__INLINE int cpGetBitSize(Ipp32u offset, Ipp32u val)
+__IPPCP_INLINE int cpGetBitSize(Ipp32u offset, Ipp32u val)
 {
     int bitSize = 31;
     if (val == 0) return 0;
diff --git a/sources/ippcp/pcpmontgomery.h b/sources/ippcp/pcpmontgomery.h
index 3e5509c3..a8ee411d 100644
--- a/sources/ippcp/pcpmontgomery.h
+++ b/sources/ippcp/pcpmontgomery.h
@@ -14,11 +14,11 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
+/*
 //               Intel(R) Integrated Performance Primitives
 //                   Cryptographic Primitives (ippcp)
-// 
-// 
+//
+//
 */
 
 #if !defined(_CP_MONTGOMETRY_H)
@@ -64,14 +64,14 @@ struct _cpMontgomery
 /*
 // Montgomery reduction, multiplication and squaring
 */
-__INLINE void cpMontRed_BNU(BNU_CHUNK_T* pR,
+__IPPCP_INLINE void cpMontRed_BNU(BNU_CHUNK_T* pR,
                             BNU_CHUNK_T* pProduct,
                             gsModEngine* pModEngine)
 {
    MOD_METHOD( pModEngine )->red(pR, pProduct, pModEngine);
 }
 
-__INLINE void cpMontMul_BNU(BNU_CHUNK_T* pR,
+__IPPCP_INLINE void cpMontMul_BNU(BNU_CHUNK_T* pR,
                      const BNU_CHUNK_T* pA,
                      const BNU_CHUNK_T* pB,
                            gsModEngine* pModEngine)
@@ -79,7 +79,7 @@ __INLINE void cpMontMul_BNU(BNU_CHUNK_T* pR,
    MOD_METHOD( pModEngine )->mul(pR, pA, pB, pModEngine);
 }
 
-__INLINE cpSize cpMontMul_BNU_EX(BNU_CHUNK_T* pR,
+__IPPCP_INLINE cpSize cpMontMul_BNU_EX(BNU_CHUNK_T* pR,
                            const BNU_CHUNK_T* pA, cpSize nsA,
                            const BNU_CHUNK_T* pB, cpSize nsB,
                                  gsModEngine* pModEngine)
@@ -100,14 +100,14 @@ __INLINE cpSize cpMontMul_BNU_EX(BNU_CHUNK_T* pR,
    return nsM;
 }
 
-__INLINE void cpMontSqr_BNU(BNU_CHUNK_T* pR,
+__IPPCP_INLINE void cpMontSqr_BNU(BNU_CHUNK_T* pR,
                       const BNU_CHUNK_T* pA,
                             gsModEngine* pModEngine)
 {
    MOD_METHOD( pModEngine )->sqr(pR, pA, pModEngine);
 }
 
-__INLINE void cpMontSqr_BNU_EX(BNU_CHUNK_T* pR,
+__IPPCP_INLINE void cpMontSqr_BNU_EX(BNU_CHUNK_T* pR,
                          const BNU_CHUNK_T* pA, cpSize nsA,
                                gsModEngine* pModEngine)
 {
@@ -120,7 +120,7 @@ __INLINE void cpMontSqr_BNU_EX(BNU_CHUNK_T* pR,
 /*
 // Montgomery encoding/decoding
 */
-__INLINE cpSize cpMontEnc_BNU(BNU_CHUNK_T* pR,
+__IPPCP_INLINE cpSize cpMontEnc_BNU(BNU_CHUNK_T* pR,
                         const BNU_CHUNK_T* pXreg,
                               gsModEngine* pModEngine)
 {
@@ -132,7 +132,7 @@ __INLINE cpSize cpMontEnc_BNU(BNU_CHUNK_T* pR,
    return nsM;
 }
 
-__INLINE cpSize cpMontEnc_BNU_EX(BNU_CHUNK_T* pR,
+__IPPCP_INLINE cpSize cpMontEnc_BNU_EX(BNU_CHUNK_T* pR,
                            const BNU_CHUNK_T* pXreg, cpSize nsX,
                                  gsModEngine* pModEngine)
 {
@@ -147,7 +147,7 @@ __INLINE cpSize cpMontEnc_BNU_EX(BNU_CHUNK_T* pR,
    return nsM;
 }
 
-__INLINE cpSize cpMontDec_BNU(BNU_CHUNK_T* pR,
+__IPPCP_INLINE cpSize cpMontDec_BNU(BNU_CHUNK_T* pR,
                         const BNU_CHUNK_T* pXmont, cpSize nsX,
                               gsModEngine* pModEngine)
 {
@@ -161,7 +161,7 @@ __INLINE cpSize cpMontDec_BNU(BNU_CHUNK_T* pR,
    return nsM;
 }
 
-__INLINE void cpMontMul_BN(IppsBigNumState* pRbn,
+__IPPCP_INLINE void cpMontMul_BN(IppsBigNumState* pRbn,
                      const IppsBigNumState* pXbn,
                      const IppsBigNumState* pYbn,
                            gsModEngine*     pModEngine)
@@ -176,7 +176,7 @@ __INLINE void cpMontMul_BN(IppsBigNumState* pRbn,
    BN_SIGN(pRbn) = ippBigNumPOS;
 }
 
-__INLINE void cpMontEnc_BN(IppsBigNumState* pRbn,
+__IPPCP_INLINE void cpMontEnc_BN(IppsBigNumState* pRbn,
                      const IppsBigNumState* pXbn,
                            gsModEngine*     pModEngine)
 {
@@ -188,7 +188,7 @@ __INLINE void cpMontEnc_BN(IppsBigNumState* pRbn,
    BN_SIGN(pRbn) = ippBigNumPOS;
 }
 
-__INLINE void cpMontDec_BN(IppsBigNumState* pRbn,
+__IPPCP_INLINE void cpMontDec_BN(IppsBigNumState* pRbn,
                      const IppsBigNumState* pXbn,
                            gsModEngine*     pModEngine)
 {
@@ -207,7 +207,7 @@ __INLINE void cpMontDec_BN(IppsBigNumState* pRbn,
 #define cpMontExpBin_BNU_sscm OWNAPI(cpMontExpBin_BNU_sscm)
    IPP_OWN_DECL (cpSize, cpMontExpBin_BNU_sscm, (BNU_CHUNK_T* pY, const BNU_CHUNK_T* pX, cpSize nsX, const BNU_CHUNK_T* pE, cpSize nsE, gsModEngine* pModEngine))
 
-__INLINE void cpMontExpBin_BN_sscm(IppsBigNumState* pYbn,
+__IPPCP_INLINE void cpMontExpBin_BN_sscm(IppsBigNumState* pYbn,
                              const IppsBigNumState* pXbn,
                              const IppsBigNumState* pEbn,
                                    gsModEngine*     pMont)
@@ -223,7 +223,7 @@ __INLINE void cpMontExpBin_BN_sscm(IppsBigNumState* pYbn,
    BN_SIGN(pYbn) = ippBigNumPOS;
 }
 
-__INLINE void cpMontExpBin_BN(IppsBigNumState* pYbn,
+__IPPCP_INLINE void cpMontExpBin_BN(IppsBigNumState* pYbn,
                         const IppsBigNumState* pXbn,
                         const IppsBigNumState* pEbn,
                               gsModEngine* pModEngine)
diff --git a/sources/ippcp/pcpmontred.h b/sources/ippcp/pcpmontred.h
index 74ebef75..2ef13268 100644
--- a/sources/ippcp/pcpmontred.h
+++ b/sources/ippcp/pcpmontred.h
@@ -14,10 +14,10 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
+/*
 //               Intel(R) Integrated Performance Primitives
 //                   Cryptographic Primitives (ippcp)
-// 
+//
 */
 #if !defined(_CP_MONTRED_H)
 #define _CP_MONTRED_H
@@ -34,7 +34,7 @@
 #define cpMontRedAdx_BNU OWNAPI(cpMontRedAdx_BNU)
     IPP_OWN_DECL (void, cpMontRedAdx_BNU, (BNU_CHUNK_T* pR, BNU_CHUNK_T* pProduct, const BNU_CHUNK_T* pModulus, cpSize nsM, BNU_CHUNK_T m0))
 
-__INLINE void cpMontRed_BNU_opt(BNU_CHUNK_T* pR,
+__IPPCP_INLINE void cpMontRed_BNU_opt(BNU_CHUNK_T* pR,
                                 BNU_CHUNK_T* pProduct,
                           const BNU_CHUNK_T* pModulus, cpSize nsM, BNU_CHUNK_T m0)
 {
diff --git a/sources/ippcp/pcpngmontexpstuff.h b/sources/ippcp/pcpngmontexpstuff.h
index c2943ab9..1c924d86 100644
--- a/sources/ippcp/pcpngmontexpstuff.h
+++ b/sources/ippcp/pcpngmontexpstuff.h
@@ -14,14 +14,14 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     Internal Definitions and
 //     Internal ng RSA Function Prototypes
-// 
-// 
+//
+//
 */
 
 #if !defined(_CP_NG_MONT_EXP_STUFF_H)
@@ -35,7 +35,7 @@
 /*
 // optimal size of fixed window exponentiation
 */
-__INLINE cpSize gsMontExp_WinSize(cpSize bitsize)
+__IPPCP_INLINE cpSize gsMontExp_WinSize(cpSize bitsize)
 {
    #if defined(_USE_WINDOW_EXP_)
    // new computations
@@ -56,7 +56,7 @@ __INLINE cpSize gsMontExp_WinSize(cpSize bitsize)
 /*
 // Montgomery encoding/decoding
 */
-__INLINE cpSize gsMontEnc_BNU(BNU_CHUNK_T* pR,
+__IPPCP_INLINE cpSize gsMontEnc_BNU(BNU_CHUNK_T* pR,
                         const BNU_CHUNK_T* pXreg, cpSize nsX,
                         const gsModEngine* pMont)
 {
@@ -66,7 +66,7 @@ __INLINE cpSize gsMontEnc_BNU(BNU_CHUNK_T* pR,
    return nsM;
 }
 
-__INLINE cpSize gsMontDec_BNU(BNU_CHUNK_T* pR,
+__IPPCP_INLINE cpSize gsMontDec_BNU(BNU_CHUNK_T* pR,
                         const BNU_CHUNK_T* pXmont,
                               gsModEngine* pMont)
 {
@@ -75,7 +75,7 @@ __INLINE cpSize gsMontDec_BNU(BNU_CHUNK_T* pR,
    return nsM;
 }
 
-__INLINE void gsMontEnc_BN(IppsBigNumState* pRbn,
+__IPPCP_INLINE void gsMontEnc_BN(IppsBigNumState* pRbn,
                      const IppsBigNumState* pXbn,
                            gsModEngine* pMont)
 {
diff --git a/sources/ippcp/pcpngmontexpstuff_avx2.c b/sources/ippcp/pcpngmontexpstuff_avx2.c
index ddc88bcd..6fb10cdd 100644
--- a/sources/ippcp/pcpngmontexpstuff_avx2.c
+++ b/sources/ippcp/pcpngmontexpstuff_avx2.c
@@ -99,7 +99,7 @@ static int dig27_regular(Ipp32u* pRegular, int regLen, const Ipp64u* pRep27, int
 }
 
 /* mont_mul wrapper */
-__INLINE void cpMontMul_avx2(Ipp64u* pR, const Ipp64u* pA, const Ipp64u* pB, const Ipp64u* pModulus, int mLen, Ipp64u k0, Ipp64u* pBuffer)
+__IPPCP_INLINE void cpMontMul_avx2(Ipp64u* pR, const Ipp64u* pA, const Ipp64u* pB, const Ipp64u* pModulus, int mLen, Ipp64u k0, Ipp64u* pBuffer)
 {
    if(mLen==38)  /* corresponds to 1024-bit regular representation */
       cpMontMul1024_avx2(pR, pA, pB, pModulus, mLen, k0);
@@ -115,7 +115,7 @@ __INLINE void cpMontMul_avx2(Ipp64u* pR, const Ipp64u* pA, const Ipp64u* pB, con
 }
 
 /* mont_sqr wrapper */
-__INLINE void cpMontSqr_avx2(Ipp64u* pR, const Ipp64u* pA, const Ipp64u* pModulus, int mLen, Ipp64u k0, Ipp64u* pBuffer)
+__IPPCP_INLINE void cpMontSqr_avx2(Ipp64u* pR, const Ipp64u* pA, const Ipp64u* pModulus, int mLen, Ipp64u k0, Ipp64u* pBuffer)
 {
    if(mLen==38) /* corresponds to 1024-bit regular representation */
       cpMontSqr1024_avx2(pR, pA, pModulus, mLen, k0, pBuffer);
@@ -384,7 +384,7 @@ IPP_OWN_DEFN (cpSize, gsMontExpBin_BNU_sscm_avx2, (BNU_CHUNK_T* dataY, const BNU
 // "fast" fixed-size window montgomery exponentiation
 //
 // scratch buffer structure:
-//    precomuted table of multipliers[(1<<w)*redLen]
+//    precomputed table of multipliers[(1<<w)*redLen]
 //    redM[redBufferLen]
 //    redY[redBufferLen]
 //    redT[redBufferLen]
@@ -479,7 +479,7 @@ IPP_OWN_DEFN (cpSize, gsMontExpWin_BNU_avx2, (BNU_CHUNK_T* dataY, const BNU_CHUN
       int eBit = bitsizeE - window;
 
       /* Note:  Static analysis can generate error/warning on the expression below.
-      
+
       The value of "bitSizeE" is limited, (modulusBitSize > bitSizeE > 0),
       it is checked in initialization phase by (ippsRSA_GetSizePublickey() and ippsRSA_InitPublicKey).
       Buffer "redE" assigned for copy of dataE, is 1 (64-bit) chunk longer than size of RSA modulus,
@@ -538,7 +538,7 @@ IPP_OWN_DEFN (cpSize, gsMontExpWin_BNU_avx2, (BNU_CHUNK_T* dataY, const BNU_CHUN
 // "safe" fixed-size window montgomery exponentiation
 //
 // scratch buffer structure:
-//    precomuted table of multipliers[(1<<w)*redLen]
+//    precomputed table of multipliers[(1<<w)*redLen]
 //    redM[redBufferLen]
 //    redY[redBufferLen]
 //    redT[redBufferLen]
diff --git a/sources/ippcp/pcpngmontexpstuff_avx2.h b/sources/ippcp/pcpngmontexpstuff_avx2.h
index 53eef799..55497cd2 100644
--- a/sources/ippcp/pcpngmontexpstuff_avx2.h
+++ b/sources/ippcp/pcpngmontexpstuff_avx2.h
@@ -41,11 +41,11 @@
 
 
 /* number of "diSize" chunks in "bitSize" bit string */
-__INLINE int cpDigitNum_avx2(int bitSize, int digSize)
+__IPPCP_INLINE int cpDigitNum_avx2(int bitSize, int digSize)
 { return (bitSize + digSize-1)/digSize; }
 
 /* number of "EXP_DIGIT_SIZE_AVX2" chunks in "bitSize" bit string matched for AMM */
-__INLINE cpSize numofVariable_avx2(int modulusBits)
+__IPPCP_INLINE cpSize numofVariable_avx2(int modulusBits)
 {
    cpSize ammBitSize = 2 + cpDigitNum_avx2(modulusBits, BITSIZE(BNU_CHUNK_T)) * BITSIZE(BNU_CHUNK_T);
    cpSize redNum = cpDigitNum_avx2(ammBitSize, EXP_DIGIT_SIZE_AVX2);
@@ -53,7 +53,7 @@ __INLINE cpSize numofVariable_avx2(int modulusBits)
 }
 
 /* buffer corresponding to numofVariable_avx2() */
-__INLINE cpSize numofVariableBuff_avx2(int numV)
+__IPPCP_INLINE cpSize numofVariableBuff_avx2(int numV)
 {
    return numV +4;
 }
diff --git a/sources/ippcp/pcpngmontexpstuff_avx512.c b/sources/ippcp/pcpngmontexpstuff_avx512.c
index 51191568..bd8adcac 100644
--- a/sources/ippcp/pcpngmontexpstuff_avx512.c
+++ b/sources/ippcp/pcpngmontexpstuff_avx512.c
@@ -810,7 +810,7 @@ IPP_OWN_DEFN (cpSize, gsMontExpBin_BNU_sscm_avx512, (BNU_CHUNK_T* dataY, const B
 // "fast" fixed-size window montgomery exponentiation
 //
 // scratch buffer structure:
-//    precomuted table of multipliers[(1<<w)*redLen]
+//    precomputed table of multipliers[(1<<w)*redLen]
 //    redM[redBufferLen]
 //    redY[redBufferLen]
 //    redT[redBufferLen]
@@ -898,7 +898,7 @@ IPP_OWN_DEFN (cpSize, gsMontExpWin_BNU_avx512, (BNU_CHUNK_T* dataY, const BNU_CH
       int eBit = bitsizeE - window;
 
       /* Note:  Static analysis can generate error/warning on the expression below.
-      
+
       The value of "bitSizeE" is limited, (modulusBitSize > bitSizeE > 0),
       it is checked in initialization phase by (ippsRSA_GetSizePublickey() and ippsRSA_InitPublicKey).
       Buffer "redE" assigned for copy of dataE, is 1 (64-bit) chunk longer than size of RSA modulus,
@@ -943,7 +943,7 @@ IPP_OWN_DEFN (cpSize, gsMontExpWin_BNU_avx512, (BNU_CHUNK_T* dataY, const BNU_CH
 // "safe" fixed-size window montgomery exponentiation
 //
 // scratch buffer structure:
-//    precomuted table of multipliers[(1<<w)*redLen]
+//    precomputed table of multipliers[(1<<w)*redLen]
 //    redM[redBufferLen]
 //    redY[redBufferLen]
 //    redT[redBufferLen]
diff --git a/sources/ippcp/pcpngmontexpstuff_avx512.h b/sources/ippcp/pcpngmontexpstuff_avx512.h
index 5eb62d3f..7ea1ed83 100644
--- a/sources/ippcp/pcpngmontexpstuff_avx512.h
+++ b/sources/ippcp/pcpngmontexpstuff_avx512.h
@@ -39,7 +39,7 @@
 #define NUMBER_OF_DIGITS(bitsize, digsize)   (((bitsize) + (digsize)-1)/(digsize))
 
 /* number of "EXP_DIGIT_SIZE_AVX512" chunks in "bitSize" bit string matched for AMM */
-__INLINE cpSize numofVariable_avx512(int modulusBits)
+__IPPCP_INLINE cpSize numofVariable_avx512(int modulusBits)
 {
    cpSize ammBitSize = 2 + NUMBER_OF_DIGITS(modulusBits, BITSIZE(BNU_CHUNK_T)) * BITSIZE(BNU_CHUNK_T);
    cpSize redNum = NUMBER_OF_DIGITS(ammBitSize, EXP_DIGIT_SIZE_AVX512);
@@ -52,7 +52,7 @@ __INLINE cpSize numofVariable_avx512(int modulusBits)
  *
  * |regCapacity| is a capacity of a single register in qwords
  */
-__INLINE int numofVariableBuff_avx512(int len, int regCapacity)
+__IPPCP_INLINE int numofVariableBuff_avx512(int len, int regCapacity)
 {
    int tail = len % regCapacity;
    if(0==tail) tail = regCapacity;
@@ -65,7 +65,7 @@ __INLINE int numofVariableBuff_avx512(int len, int regCapacity)
 */
 
 /* pair of 52-bit digits occupys 13 bytes (the fact is using in implementation beloow) */
-__INLINE Ipp64u getDig52(const Ipp8u* pStr, int strLen)
+__IPPCP_INLINE Ipp64u getDig52(const Ipp8u* pStr, int strLen)
 {
    Ipp64u digit = 0;
    for(; strLen>0; strLen--) {
@@ -109,7 +109,7 @@ static void regular_dig52(Ipp64u* out, int outLen /* in qwords */, const Ipp64u*
    converts "redundant" (base = 2^DIGIT_SIZE) representation
    into regular (base = 2^64)
 */
-__INLINE void putDig52(Ipp8u* pStr, int strLen, Ipp64u digit)
+__IPPCP_INLINE void putDig52(Ipp8u* pStr, int strLen, Ipp64u digit)
 {
    for(; strLen>0; strLen--) {
       *pStr++ = (Ipp8u)(digit&0xFF);
diff --git a/sources/ippcp/pcpngmontexpstuff_sse2.c b/sources/ippcp/pcpngmontexpstuff_sse2.c
index 36bfb895..c53b3fa1 100644
--- a/sources/ippcp/pcpngmontexpstuff_sse2.c
+++ b/sources/ippcp/pcpngmontexpstuff_sse2.c
@@ -100,7 +100,7 @@ static int dig27_regular(Ipp32u* pRegular, int regLen, const Ipp64u* pRep27, int
 
 /*
    normalize "redundant" representation (pUnorm, len) into (pNorm, len)
-   and returns extansion
+   and returns extension
 */
 static Ipp64u cpDigit27_normalize(Ipp64u* pNorm, const Ipp64u* pUnorm, int len)
 {
@@ -643,7 +643,7 @@ IPP_OWN_DEFN (cpSize, gsMontExpBin_BNU_sscm_sse2, (BNU_CHUNK_T* dataY, const BNU
 // "fast" fixed-size window montgomery exponentiation
 //
 // scratch buffer structure:
-//    precomuted table of multipliers[(1<<w)*redLen]
+//    precomputed table of multipliers[(1<<w)*redLen]
 //    redM[redBufferLen]
 //    redY[redBufferLen]
 //    redT[redBufferLen]
diff --git a/sources/ippcp/pcpngmontexpstuff_sse2.h b/sources/ippcp/pcpngmontexpstuff_sse2.h
index 46d0b307..832b3a4b 100644
--- a/sources/ippcp/pcpngmontexpstuff_sse2.h
+++ b/sources/ippcp/pcpngmontexpstuff_sse2.h
@@ -41,11 +41,11 @@
 
 
 /* number of "diSize" chunks in "bitSize" bit string */
-__INLINE int cpDigitNum_sse2(int bitSize, int digSize)
+__IPPCP_INLINE int cpDigitNum_sse2(int bitSize, int digSize)
 { return (bitSize + digSize-1)/digSize; }
 
 /* number of "RSA_SSE2_DIGIT_SIZE" chunks in "bitSize" bit string matched for AMM */
-__INLINE cpSize numofVariable_sse2(int modulusBits)
+__IPPCP_INLINE cpSize numofVariable_sse2(int modulusBits)
 {
    cpSize ammBitSize = 2 + cpDigitNum_sse2(modulusBits, BITSIZE(BNU_CHUNK_T)) * BITSIZE(BNU_CHUNK_T);
    cpSize redNum = cpDigitNum_sse2(ammBitSize, EXP_DIGIT_SIZE_SSE2);
@@ -53,7 +53,7 @@ __INLINE cpSize numofVariable_sse2(int modulusBits)
 }
 
 /* buffer corresponding to numofVariable_sse2() */
-__INLINE cpSize numofVariableBuff_sse2(int numV)
+__IPPCP_INLINE cpSize numofVariableBuff_sse2(int numV)
 {
    return numV +4 +(numV&1);
 }
diff --git a/sources/ippcp/pcpngmontexpstuff_win.c b/sources/ippcp/pcpngmontexpstuff_win.c
index aa12f2c5..7688c192 100644
--- a/sources/ippcp/pcpngmontexpstuff_win.c
+++ b/sources/ippcp/pcpngmontexpstuff_win.c
@@ -14,8 +14,8 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     Modular Exponentiation (windowed GPR version)
@@ -47,7 +47,7 @@ IPP_OWN_DEFN (cpSize, gsMontExpWinBuffer, (int modulusBits))
 // - possible inplace mode
 //
 // scratch buffer structure:
-//    precomuted table of multipliers[(1<<w)*nsM]
+//    precomputed table of multipliers[(1<<w)*nsM]
 //    RR[nsM]     tmp result if inplace operation
 //    EE[nsM+1]   power expasin
 */
@@ -99,7 +99,7 @@ IPP_OWN_DEFN (cpSize, gsMontExpWin_BNU, (BNU_CHUNK_T* dataY, const BNU_CHUNK_T*
          int eBit = bitsizeE-winSize;
 
          /* Note: Static analysis can generate error/warning on the expression below.
-         
+
          The value of "bitSizeE" is limited, ((modulusBitSize > bitSizeE > 0),
          it is checked in initialization phase by (ippsRSA_GetSizePublickey() and ippsRSA_InitPublicKey).
          Buffer "dataEE" assigned for copy of dataExp, is 1 (64-bit) chunk longer than size of RSA modulus,
diff --git a/sources/ippcp/pcpngmontexpstuff_win_sscm.c b/sources/ippcp/pcpngmontexpstuff_win_sscm.c
index 4ab78311..d1b25966 100644
--- a/sources/ippcp/pcpngmontexpstuff_win_sscm.c
+++ b/sources/ippcp/pcpngmontexpstuff_win_sscm.c
@@ -14,8 +14,8 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     Modular Exponentiation (windowed "safe" version)
@@ -36,7 +36,7 @@
 // - possible inplace mode
 //
 // scratch buffer structure:
-//    precomuted table of multipliers[(1<<w)*nsM]
+//    precomputed table of multipliers[(1<<w)*nsM]
 //    RR[nsM]   tmp result if inplace operation
 //    TT[nsM]  unscrmbled table entry
 //    EE[nsM+1] power expasin
diff --git a/sources/ippcp/pcpprng_genhw.h b/sources/ippcp/pcpprng_genhw.h
index 2f51a9be..5b42b0b2 100644
--- a/sources/ippcp/pcpprng_genhw.h
+++ b/sources/ippcp/pcpprng_genhw.h
@@ -14,16 +14,16 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     PRNG Functions
-// 
+//
 //  Contents:
 //     HW random generator
-// 
-// 
+//
+//
 */
 
 #include "owndefs.h"
@@ -36,7 +36,7 @@
 #define _PCP_PRN_GEN_HW_H
 
 #if ((_IPP>=_IPP_G9) || (_IPP32E>=_IPP32E_E9))
-__INLINE int cpRand_hw_sample(BNU_CHUNK_T* pSample)
+__IPPCP_INLINE int cpRand_hw_sample(BNU_CHUNK_T* pSample)
 {
 #define LOCAL_COUNTER (8)
    int n;
@@ -54,7 +54,7 @@ __INLINE int cpRand_hw_sample(BNU_CHUNK_T* pSample)
 }
 
 #if (_IPP32E>=_IPP32E_E9)
-__INLINE int cpRand_hw_sample32(Ipp32u* pSample)
+__IPPCP_INLINE int cpRand_hw_sample32(Ipp32u* pSample)
 {
 #define LOCAL_COUNTER (8)
    int n;
@@ -81,7 +81,7 @@ __INLINE int cpRand_hw_sample32(Ipp32u* pSample)
 //    bufLen    buffer length
 *F*/
 
-__INLINE int cpRandHW_buffer(Ipp32u* pBuffer, int bufLen)
+__IPPCP_INLINE int cpRandHW_buffer(Ipp32u* pBuffer, int bufLen)
 {
    int nSamples = bufLen/((Ipp32s)(sizeof(BNU_CHUNK_T)/sizeof(Ipp32u)));
 
diff --git a/sources/ippcp/pcprij128safe.h b/sources/ippcp/pcprij128safe.h
index 5af882a4..91a7aba6 100644
--- a/sources/ippcp/pcprij128safe.h
+++ b/sources/ippcp/pcprij128safe.h
@@ -14,13 +14,13 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     Internal Safe Rijndael Encrypt, Decrypt
-// 
-// 
+//
+//
 */
 
 #if !defined(_PCP_RIJ_SAFE_H)
@@ -54,7 +54,7 @@
     IPP_OWN_DECL (void, TransformComposite2Native, (Ipp8u out[16], const Ipp8u inp[16]))
 
 /* add round key operation */
-__INLINE void AddRoundKey(Ipp8u out[16], const Ipp8u inp[16], const Ipp8u rkey[16])
+__IPPCP_INLINE void AddRoundKey(Ipp8u out[16], const Ipp8u inp[16], const Ipp8u rkey[16])
 {
    ((Ipp64u*)out)[0] = ((Ipp64u*)inp)[0] ^ ((Ipp64u*)rkey)[0];
    ((Ipp64u*)out)[1] = ((Ipp64u*)inp)[1] ^ ((Ipp64u*)rkey)[1];
@@ -63,7 +63,7 @@ __INLINE void AddRoundKey(Ipp8u out[16], const Ipp8u inp[16], const Ipp8u rkey[1
 /* add logs of GF(2^4) elements
 // the exp table has been build matched for that implementation
 */
-__INLINE Ipp8u AddLogGF16(Ipp8u loga, Ipp8u logb)
+__IPPCP_INLINE Ipp8u AddLogGF16(Ipp8u loga, Ipp8u logb)
 {
    //Ipp8u s = loga+logb;
    //return (s>2*14)? 15 : (s>14)? s-15 : s;
@@ -77,7 +77,7 @@ __INLINE Ipp8u AddLogGF16(Ipp8u loga, Ipp8u logb)
 
 #define SELECTION_BITS  ((sizeof(BNU_CHUNK_T)/sizeof(Ipp8u)) -1)
 
-__INLINE Ipp8u getSboxValue(Ipp8u x)
+__IPPCP_INLINE Ipp8u getSboxValue(Ipp8u x)
 {
   BNU_CHUNK_T selection = 0;
   const Ipp8u* SboxEntry = RijEncSbox;
diff --git a/sources/ippcp/pcprij128safe2.h b/sources/ippcp/pcprij128safe2.h
index 9f893730..2b90b02a 100644
--- a/sources/ippcp/pcprij128safe2.h
+++ b/sources/ippcp/pcprij128safe2.h
@@ -14,13 +14,13 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     Internal Safe Rijndael Encrypt, Decrypt
-// 
-// 
+//
+//
 */
 
 #if !defined(_PCP_RIJ_SAFE2_H)
@@ -48,7 +48,7 @@
    (out)[11] = (inp)[14]; \
    (out)[15] = (inp)[15]
 
-__INLINE void XorRoundKey(Ipp32u* state, const Ipp32u* RoundKey)
+__IPPCP_INLINE void XorRoundKey(Ipp32u* state, const Ipp32u* RoundKey)
 {
    state[0] ^= RoundKey[0];
    state[1] ^= RoundKey[1];
@@ -57,13 +57,13 @@ __INLINE void XorRoundKey(Ipp32u* state, const Ipp32u* RoundKey)
 }
 
 // xtime is a macro that finds the product of {02} and the argument to xtime modulo {1b}
-__INLINE Ipp32u mask4(Ipp32u x)
+__IPPCP_INLINE Ipp32u mask4(Ipp32u x)
 {
    x &= 0x80808080;
    return (Ipp32u)((x<<1) - (x>>7));
 }
 
-__INLINE Ipp32u xtime4(Ipp32u x)
+__IPPCP_INLINE Ipp32u xtime4(Ipp32u x)
 {
    Ipp32u t = (x+x) &0xFEFEFEFE;
    t ^= mask4(x) & 0x1B1B1B1B;
diff --git a/sources/ippcp/pcprij128safedec2pxca.c b/sources/ippcp/pcprij128safedec2pxca.c
index 8fc6148c..bf49d814 100644
--- a/sources/ippcp/pcprij128safedec2pxca.c
+++ b/sources/ippcp/pcprij128safedec2pxca.c
@@ -14,17 +14,17 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     Decrypt 128-bit data block according to Rijndael
 //     (compact S-box based implementation)
-// 
+//
 //  Contents:
 //     Safe2Decrypt_RIJ128()
-// 
-// 
+//
+//
 */
 
 #include "owncp.h"
@@ -41,7 +41,7 @@
 #define SELECTION_BITS  ((sizeof(BNU_CHUNK_T)/sizeof(Ipp8u)) -1)
 
 #if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)
-__INLINE Ipp8u getInvSboxValue(Ipp8u x)
+__IPPCP_INLINE Ipp8u getInvSboxValue(Ipp8u x)
 {
    BNU_CHUNK_T selection = 0;
    const BNU_CHUNK_T* SboxEntry = (BNU_CHUNK_T*)RijDecSbox;
@@ -58,7 +58,7 @@ __INLINE Ipp8u getInvSboxValue(Ipp8u x)
 
 #else
 #include "pcpmask_ct.h"
-__INLINE Ipp8u getInvSboxValue(Ipp8u x)
+__IPPCP_INLINE Ipp8u getInvSboxValue(Ipp8u x)
 {
    BNU_CHUNK_T selection = 0;
    const BNU_CHUNK_T* SboxEntry = (BNU_CHUNK_T*)RijDecSbox;
@@ -74,21 +74,21 @@ __INLINE Ipp8u getInvSboxValue(Ipp8u x)
 }
 #endif
 
-__INLINE void invSubBytes(Ipp8u state[])
+__IPPCP_INLINE void invSubBytes(Ipp8u state[])
 {
    int i;
    for(i=0;i<16;i++)
       state[i] = getInvSboxValue(state[i]);
 }
 
-__INLINE void invShiftRows(Ipp32u* state)
+__IPPCP_INLINE void invShiftRows(Ipp32u* state)
 {
    state[1] =  ROR32(state[1], 24);
    state[2] =  ROR32(state[2], 16);
    state[3] =  ROR32(state[3],  8);
 }
 
-__INLINE void invMixColumns(Ipp32u* state)
+__IPPCP_INLINE void invMixColumns(Ipp32u* state)
 {
    Ipp32u y0 = state[1] ^ state[2] ^ state[3];
    Ipp32u y1 = state[0] ^ state[2] ^ state[3];
diff --git a/sources/ippcp/pcprij128safeenc2pxca.c b/sources/ippcp/pcprij128safeenc2pxca.c
index 73b57c4b..ef083f7d 100644
--- a/sources/ippcp/pcprij128safeenc2pxca.c
+++ b/sources/ippcp/pcprij128safeenc2pxca.c
@@ -14,17 +14,17 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     Encrypt 128-bit data block according to Rijndael
 //     (compact S-box based implementation)
-// 
+//
 //  Contents:
 //     Safe2Encrypt_RIJ128()
-// 
-// 
+//
+//
 */
 
 #include "owncp.h"
@@ -37,7 +37,7 @@
 #include "pcprij128safe2.h"
 #include "pcprijtables.h"
 
-__INLINE void SubBytes(Ipp8u state[])
+__IPPCP_INLINE void SubBytes(Ipp8u state[])
 {
    int i;
    for(i=0;i<16;i++) {
@@ -46,7 +46,7 @@ __INLINE void SubBytes(Ipp8u state[])
 }
 
 
-__INLINE void ShiftRows(Ipp32u* state)
+__IPPCP_INLINE void ShiftRows(Ipp32u* state)
 {
    state[1] =  ROR32(state[1], 8);
    state[2] =  ROR32(state[2], 16);
@@ -54,7 +54,7 @@ __INLINE void ShiftRows(Ipp32u* state)
 }
 
 // MixColumns4 function mixes the columns of the state matrix
-__INLINE void MixColumns(Ipp32u* state)
+__IPPCP_INLINE void MixColumns(Ipp32u* state)
 {
    Ipp32u y0 = state[1] ^ state[2] ^ state[3];
    Ipp32u y1 = state[0] ^ state[2] ^ state[3];
diff --git a/sources/ippcp/pcprij128safeencpxca.c b/sources/ippcp/pcprij128safeencpxca.c
index 0b1191b2..abaae489 100644
--- a/sources/ippcp/pcprij128safeencpxca.c
+++ b/sources/ippcp/pcprij128safeencpxca.c
@@ -14,17 +14,17 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     Encrypt 128-bit data block according to Rijndael
 //     (It's the special free from Sbox/tables implementation)
-// 
+//
 //  Contents:
 //     SafeEncrypt_RIJ128()
-// 
-// 
+//
+//
 */
 
 #include "owncp.h"
@@ -261,7 +261,7 @@ static void FwdSubByte(Ipp8u blk[16])
 
 /* inplace ShifttRows operation */
 /* int ShiftRowsInx[] = {0,5,10,15, 4,9,14,3, 8,13,2,7, 12,1,6,11}; */
-__INLINE void FwdShiftRows(Ipp8u blk[16])
+__IPPCP_INLINE void FwdShiftRows(Ipp8u blk[16])
 {
    Ipp8u x = blk[1];
    blk[1] = blk[5];
diff --git a/sources/ippcp/pcprijkeysca.c b/sources/ippcp/pcprijkeysca.c
index 31487e1e..10daf549 100644
--- a/sources/ippcp/pcprijkeysca.c
+++ b/sources/ippcp/pcprijkeysca.c
@@ -14,17 +14,17 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     Initialization of Rijndael
-// 
+//
 //  Contents:
 //     EncRijndaelKeys()
 //     DecRijndaelKeys()
-// 
-// 
+//
+//
 */
 
 #include "owndefs.h"
@@ -143,7 +143,7 @@ static const Ipp32u RconTbl[] = {
 
 /// commented due to mitigation
 //
-///* precomputed table for InvMixColumn() operation */ 
+///* precomputed table for InvMixColumn() operation */
 //static const Ipp32u InvMixCol_Tbl[4][256] = {
 //   { LINE(inv_t0) },
 //   { LINE(inv_t1) },
@@ -157,7 +157,7 @@ static const Ipp32u RconTbl[] = {
 //    ^(tbl)[2][ EBYTE((x),2) ] \
 //    ^(tbl)[3][ EBYTE((x),3) ] )
 
-__INLINE Ipp32u InvMixColumn(Ipp32u x)
+__IPPCP_INLINE Ipp32u InvMixColumn(Ipp32u x)
 {
   Ipp32u x_mul_2 = xtime4(x);
   Ipp32u x_mul_4 = xtime4(x_mul_2);
@@ -193,7 +193,7 @@ IPP_OWN_DEFN (void, ExpandRijndaelKey, (const Ipp8u* pKey, int NK, int NB, int N
       Ipp32u k3 = enc_keys[3];
 
       for(n=NK128; n<nKeys; n+=NK128) {
-         /* key expansion: extract bytes, substitute via Sbox and rorate */
+         /* key expansion: extract bytes, substitute via Sbox and rotate */
          k0 ^= BYTES_TO_WORD(getSboxValue(EBYTE(k3, 1)),
                              getSboxValue(EBYTE(k3, 2)),
                              getSboxValue(EBYTE(k3, 3)),
@@ -221,7 +221,7 @@ IPP_OWN_DEFN (void, ExpandRijndaelKey, (const Ipp8u* pKey, int NK, int NB, int N
       Ipp32u k5 = enc_keys[5];
 
       for(n=NK192; n<nKeys; n+=NK192) {
-         /* key expansion: extract bytes, substitute via Sbox and rorate */
+         /* key expansion: extract bytes, substitute via Sbox and rotate */
          k0 ^= BYTES_TO_WORD(getSboxValue(EBYTE(k5, 1)),
                              getSboxValue(EBYTE(k5, 2)),
                              getSboxValue(EBYTE(k5, 3)),
@@ -255,7 +255,7 @@ IPP_OWN_DEFN (void, ExpandRijndaelKey, (const Ipp8u* pKey, int NK, int NB, int N
       Ipp32u k7 = enc_keys[7];
 
       for(n=NK256; n<nKeys; n+=NK256) {
-         /* key expansion: extract bytes, substitute via Sbox and rorate */
+         /* key expansion: extract bytes, substitute via Sbox and rotate */
          k0 ^= BYTES_TO_WORD(getSboxValue(EBYTE(k7, 1)),
                              getSboxValue(EBYTE(k7, 2)),
                              getSboxValue(EBYTE(k7, 3)),
diff --git a/sources/ippcp/pcprsa_generatesign_pkcs1v15.h b/sources/ippcp/pcprsa_generatesign_pkcs1v15.h
index 999625a5..bb43c96e 100644
--- a/sources/ippcp/pcprsa_generatesign_pkcs1v15.h
+++ b/sources/ippcp/pcprsa_generatesign_pkcs1v15.h
@@ -31,7 +31,7 @@
 #include "pcprsa_emsa_pkcs1v15.h"
 
 static int GenerateSign(const Ipp8u* pMsg, int msgLen,  /* message representation */
-    const Ipp8u* pSalt, int saltLen, /* fied string */
+    const Ipp8u* pSalt, int saltLen,
     Ipp8u* pSign,
     const IppsRSAPrivateKeyState* pPrvKey,
     const IppsRSAPublicKeyState*  pPubKey,
diff --git a/sources/ippcp/pcprsa_pkcs1v15_preproc.h b/sources/ippcp/pcprsa_pkcs1v15_preproc.h
index 10cc9dd3..b121b15c 100644
--- a/sources/ippcp/pcprsa_pkcs1v15_preproc.h
+++ b/sources/ippcp/pcprsa_pkcs1v15_preproc.h
@@ -18,7 +18,7 @@
 #include "pcphash_rmf.h"
 
 // Check all the ippsRSASign_PKCS1v15_rmf parameters and align pPrvKey, pPubKey pointers
-__INLINE IppStatus SingleSignPkcs1v15RmfPreproc(const Ipp8u* pMsg, int msgLen,
+__IPPCP_INLINE IppStatus SingleSignPkcs1v15RmfPreproc(const Ipp8u* pMsg, int msgLen,
    Ipp8u* pSign,
    const IppsRSAPrivateKeyState** pPrvKey,
    const IppsRSAPublicKeyState**  pPubKey,
@@ -49,7 +49,7 @@ __INLINE IppStatus SingleSignPkcs1v15RmfPreproc(const Ipp8u* pMsg, int msgLen,
 }
 
 // Check all the ippsRSAVerify_PKCS1v15_rmf parameters, set valid=0, align pKey pointer
-__INLINE IppStatus SingleVerifyPkcs1v15RmfPreproc(const Ipp8u* pMsg, int msgLen,
+__IPPCP_INLINE IppStatus SingleVerifyPkcs1v15RmfPreproc(const Ipp8u* pMsg, int msgLen,
    const Ipp8u* pSign, int* pIsValid,
    const IppsRSAPublicKeyState** pKey,
    const IppsHashMethod* pMethod,
diff --git a/sources/ippcp/pcprsa_pss_preproc.h b/sources/ippcp/pcprsa_pss_preproc.h
index ebb1239f..bce8c55f 100644
--- a/sources/ippcp/pcprsa_pss_preproc.h
+++ b/sources/ippcp/pcprsa_pss_preproc.h
@@ -18,7 +18,7 @@
 #include "pcphash_rmf.h"
 
 // Check all the ippsRSASign_PSS_rmf parameters and align pPrvKey, pPubKey pointers
-__INLINE IppStatus SingleSignPssRmfPreproc(const Ipp8u* pMsg, int msgLen,
+__IPPCP_INLINE IppStatus SingleSignPssRmfPreproc(const Ipp8u* pMsg, int msgLen,
    const Ipp8u* pSalt, int saltLen,
    Ipp8u* pSign,
    const IppsRSAPrivateKeyState** pPrvKey,
@@ -53,7 +53,7 @@ __INLINE IppStatus SingleSignPssRmfPreproc(const Ipp8u* pMsg, int msgLen,
 }
 
 // Check all the ippsRSAVerify_PSS_rmf parameters, set valid=0, align pKey pointer
-__INLINE IppStatus SingleVerifyPssRmfPreproc(const Ipp8u* pMsg, int msgLen,
+__IPPCP_INLINE IppStatus SingleVerifyPssRmfPreproc(const Ipp8u* pMsg, int msgLen,
    const Ipp8u* pSign,
    int* pIsValid,
    const IppsRSAPublicKeyState**  pKey,
diff --git a/sources/ippcp/pcprsa_verifysign_pkcs1v15.h b/sources/ippcp/pcprsa_verifysign_pkcs1v15.h
index ea074b43..fa1507c7 100644
--- a/sources/ippcp/pcprsa_verifysign_pkcs1v15.h
+++ b/sources/ippcp/pcprsa_verifysign_pkcs1v15.h
@@ -32,7 +32,7 @@
 #include "pcprsa_emsa_pkcs1v15.h"
 
 static int VerifySign(const Ipp8u* pMsg, int msgLen,  /* message representation */
-    const Ipp8u* pSalt, int saltLen, /* fied string */
+    const Ipp8u* pSalt, int saltLen,
     const Ipp8u* pSign,
     int* pIsValid,
     const IppsRSAPublicKeyState* pKey,
diff --git a/sources/ippcp/pcpscramble.h b/sources/ippcp/pcpscramble.h
index 7bbe2014..fa5bef66 100644
--- a/sources/ippcp/pcpscramble.h
+++ b/sources/ippcp/pcpscramble.h
@@ -14,17 +14,17 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     Fixed window exponentiation scramble/unscramble
-// 
+//
 //  Contents:
 //     cpScramblePut()
 //     cpScrambleGet()
-// 
-// 
+//
+//
 */
 
 #if !defined(_PC_SCRAMBLE_H)
@@ -35,7 +35,7 @@
 // stores to/retrieves from pScrambleEntry position
 // pre-computed data if fixed window method is used
 */
-__INLINE void cpScramblePut(Ipp8u* pArray, cpSize colummSize,
+__IPPCP_INLINE void cpScramblePut(Ipp8u* pArray, cpSize colummSize,
                       const Ipp32u* pData, cpSize dataSize)
 {
    int i;
@@ -107,7 +107,7 @@ __INLINE void cpScramblePut(Ipp8u* pArray, cpSize colummSize,
   ((x) = (Ipp64u)(dw0), \
    (x)|= (((Ipp64u)(dw1))<<32))
 
-__INLINE void cpScrambleGet(Ipp32u* pData, cpSize dataSize,
+__IPPCP_INLINE void cpScrambleGet(Ipp32u* pData, cpSize dataSize,
                       const Ipp8u* pArray, cpSize colummSize)
 {
    int i;
diff --git a/sources/ippcp/pcpsha256stuff.h b/sources/ippcp/pcpsha256stuff.h
index 622b8083..a3c94f46 100644
--- a/sources/ippcp/pcpsha256stuff.h
+++ b/sources/ippcp/pcpsha256stuff.h
@@ -14,12 +14,12 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     Digesting message according to SHA256
-// 
+//
 //  Contents:
 //     ippsSHA256GetSize()
 //     ippsSHA256Init()
@@ -30,8 +30,8 @@
 //     ippsSHA256GetTag()
 //     ippsSHA256Final()
 //     ippsSHA256MessageDigest()
-// 
-// 
+//
+//
 */
 
 #include "owndefs.h"
@@ -71,7 +71,7 @@ static __ALIGN16 const Ipp32u sha256_cnt[] = {
 
 
 /* setup init hash value */
-__INLINE void hashInit(Ipp32u* pHash, const Ipp32u* iv)
+__IPPCP_INLINE void hashInit(Ipp32u* pHash, const Ipp32u* iv)
 {
    pHash[0] = iv[0];
    pHash[1] = iv[1];
diff --git a/sources/ippcp/pcpsha512stuff.h b/sources/ippcp/pcpsha512stuff.h
index 8d2182dd..8d4865f1 100644
--- a/sources/ippcp/pcpsha512stuff.h
+++ b/sources/ippcp/pcpsha512stuff.h
@@ -14,16 +14,16 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     SHA512 message digest
-// 
+//
 //  Contents:
 //     SHA512 stuff
-// 
-// 
+//
+//
 */
 
 #include "owndefs.h"
@@ -81,7 +81,7 @@ static __ALIGN16 const Ipp64u sha512_cnt[] = {
 };
 
 /* setup init hash value */
-__INLINE void hashInit(Ipp64u* pHash, const Ipp64u* iv)
+__IPPCP_INLINE void hashInit(Ipp64u* pHash, const Ipp64u* iv)
 {
    pHash[0] = iv[0];
    pHash[1] = iv[1];
@@ -177,7 +177,7 @@ IPP_OWN_DEFN (static void, cpFinalizeSHA512, (DigestSHA512 pHash, const Ipp8u* i
 {
    /* local buffer and it length */
    Ipp8u buffer[MBS_SHA512*2];
-   int bufferLen = inpLen < (MBS_SHA512-(int)MLR_SHA512)? MBS_SHA512 : MBS_SHA512*2; 
+   int bufferLen = inpLen < (MBS_SHA512-(int)MLR_SHA512)? MBS_SHA512 : MBS_SHA512*2;
 
    /* copy rest of message into internal buffer */
    CopyBlock(inpBuffer, buffer, inpLen);
diff --git a/sources/ippcp/pcpsms4.h b/sources/ippcp/pcpsms4.h
index d6cef0f5..c18b5bfb 100644
--- a/sources/ippcp/pcpsms4.h
+++ b/sources/ippcp/pcpsms4.h
@@ -14,14 +14,14 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     Internal Definitions and
 //     Internal SMS4 Function Prototypes
-// 
-// 
+//
+//
 */
 
 #if !defined(_PCP_SMS4_H)
@@ -65,7 +65,7 @@ struct _cpSMS4 {
 #define SMS4_ALIGNMENT   (4)
 
 /* size of SMS4 context */
-__INLINE int cpSizeofCtx_SMS4(void)
+__IPPCP_INLINE int cpSizeofCtx_SMS4(void)
 {
    return sizeof(IppsSMS4Spec);
 }
@@ -81,7 +81,7 @@ extern const Ipp32u SMS4_CK[32];
 #include "pcpbnuimpl.h"
 #define SELECTION_BITS  ((sizeof(BNU_CHUNK_T)/sizeof(Ipp8u)) -1)
 
-__INLINE Ipp8u getSboxValue(Ipp8u x)
+__IPPCP_INLINE Ipp8u getSboxValue(Ipp8u x)
 {
   BNU_CHUNK_T selection = 0;
   const Ipp8u* SboxEntry = SMS4_Sbox;
@@ -94,7 +94,7 @@ __INLINE Ipp8u getSboxValue(Ipp8u x)
   return (Ipp8u)(selection & 0xFF);
 }
 
-__INLINE Ipp32u cpSboxT_SMS4(Ipp32u x)
+__IPPCP_INLINE Ipp32u cpSboxT_SMS4(Ipp32u x)
 {
    Ipp32u y = getSboxValue(x & 0xFF);
    y |= (Ipp32u)(getSboxValue((x>> 8) & 0xFF) <<8);
@@ -107,12 +107,12 @@ __INLINE Ipp32u cpSboxT_SMS4(Ipp32u x)
    - linear Linear
    - mixer Mix (permutation T in the SMS4 standard phraseology)
 */
-__INLINE Ipp32u cpExpKeyLinear_SMS4(Ipp32u x)
+__IPPCP_INLINE Ipp32u cpExpKeyLinear_SMS4(Ipp32u x)
 {
    return x^ROL32(x,13)^ROL32(x,23);
 }
 
-__INLINE Ipp32u cpExpKeyMix_SMS4(Ipp32u x)
+__IPPCP_INLINE Ipp32u cpExpKeyMix_SMS4(Ipp32u x)
 {
    return cpExpKeyLinear_SMS4( cpSboxT_SMS4(x) );
 }
@@ -121,12 +121,12 @@ __INLINE Ipp32u cpExpKeyMix_SMS4(Ipp32u x)
    - linear Linear
    - mixer Mix (permutation T in the SMS4 standard phraseology)
 */
-__INLINE Ipp32u cpCipherLinear_SMS4(Ipp32u x)
+__IPPCP_INLINE Ipp32u cpCipherLinear_SMS4(Ipp32u x)
 {
    return x^ROL32(x,2)^ROL32(x,10)^ROL32(x,18)^ROL32(x,24);
 }
 
-__INLINE Ipp32u cpCipherMix_SMS4(Ipp32u x)
+__IPPCP_INLINE Ipp32u cpCipherMix_SMS4(Ipp32u x)
 {
    return cpCipherLinear_SMS4( cpSboxT_SMS4(x) );
 }
diff --git a/sources/ippcp/pcpsms4_ccmstart.c b/sources/ippcp/pcpsms4_ccmstart.c
index c1da3930..8b3fcc74 100644
--- a/sources/ippcp/pcpsms4_ccmstart.c
+++ b/sources/ippcp/pcpsms4_ccmstart.c
@@ -18,7 +18,7 @@
 //  Purpose:
 //     Cryptography Primitive.
 //     SMS4-CCM implementation.
-// 
+//
 //     Content:
 //        ippsSMS4_CCMStart()
 //
@@ -32,7 +32,7 @@
 /*F*
 //    Name: ippsSMS4_CCMStart
 //
-// Purpose: Start the process (encryption+generation) or (decryption+veryfication).
+// Purpose: Start the process (encryption+generation) or (decryption+verification).
 //
 // Returns:                Reason:
 //    ippStsNullPtrErr        pCtx == NULL
diff --git a/sources/ippcp/pcpsms4_ctr_gfni.c b/sources/ippcp/pcpsms4_ctr_gfni.c
index bc41f31a..e1520f17 100644
--- a/sources/ippcp/pcpsms4_ctr_gfni.c
+++ b/sources/ippcp/pcpsms4_ctr_gfni.c
@@ -42,12 +42,12 @@
 
 #include "pcpsms4_gfni.h"
 
-static __ALIGN32 Ipp8u endiannes_swap[] = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3,
+static __ALIGN32 Ipp8u endianness_swap[] = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3,
                                            12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3,
                                            12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3,
                                            12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
 
-static __ALIGN32 Ipp8u endiannes[] = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0,
+static __ALIGN32 Ipp8u endianness[] = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0,
                                       15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0,
                                       15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0,
                                       15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
@@ -64,7 +64,7 @@ static __ALIGN16 Ipp8u  next_inc[] =  {4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 
 static __ALIGN16 Ipp8u one128[] = {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
 
-__INLINE __m512i inc512(__m512i x, Ipp8u* increment)
+__IPPCP_INLINE __m512i inc512(__m512i x, Ipp8u* increment)
 {
    __m512i t = _mm512_add_epi64(x,  M512(increment));
    __mmask8 carryMask = _mm512_cmplt_epu64_mask(t, x);
@@ -74,7 +74,7 @@ __INLINE __m512i inc512(__m512i x, Ipp8u* increment)
    return t;
 }
 
-__INLINE __m128i inc128(__m128i x)
+__IPPCP_INLINE __m128i inc128(__m128i x)
 {
    __m128i t = _mm_add_epi64(x,  M128(one128));
    x = _mm_cmpeq_epi64(t,  _mm_setzero_si128());
@@ -88,9 +88,9 @@ static
 int cpSMS4_CTR_gfni512x32(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u* pRKey, const Ipp8u* pCtrMask, Ipp8u* pCtr);
 static
 int cpSMS4_CTR_gfni512x16(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u* pRKey, const Ipp8u* pCtrMask, Ipp8u* pCtr);
-static 
+static
 int cpSMS4_CTR_gfni128x12(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u* pRKey, const Ipp8u* pCtrMask, Ipp8u* pCtr);
-static 
+static
 int cpSMS4_CTR_gfni128x8(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u* pRKey, const Ipp8u* pCtrMask, Ipp8u* pCtr);
 static
 int cpSMS4_ECB_gfni128x4(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u* pRKey, const Ipp8u* pCtrMask, Ipp8u* pCtr);
@@ -113,94 +113,94 @@ IPP_OWN_DEFN (int, cpSMS4_CTR_gfni512, (Ipp8u* pOut, const Ipp8u* pInp, int len,
       // TMP[22] - ctrUnch
 
       TMP[20]  = _mm512_broadcast_i64x2(_mm_loadu_si128((__m128i*)pCtr));
-      TMP[21]  = _mm512_broadcast_i64x2(_mm_loadu_si128((__m128i*)pCtrMask)); 
+      TMP[21]  = _mm512_broadcast_i64x2(_mm_loadu_si128((__m128i*)pCtrMask));
 
       /* read string counter and convert to numerical */
-      TMP[20]  = _mm512_shuffle_epi8(TMP[20], M512(endiannes));
+      TMP[20]  = _mm512_shuffle_epi8(TMP[20], M512(endianness));
 
       /* read string mask and convert to numerical */
-      TMP[21]  = _mm512_shuffle_epi8(TMP[21], M512(endiannes));
+      TMP[21]  = _mm512_shuffle_epi8(TMP[21], M512(endianness));
 
       /* upchanged counter bits */
       TMP[22] = _mm512_andnot_si512(TMP[21], TMP[20]);
-      
+
       /* first incremention */
       TMP[20] = inc512(TMP[20], first_inc);
-      
+
       TMP[20] = _mm512_and_si512(TMP[21], TMP[20]);
 
       for (n = 0; n < processedLen; n += (64 * MBS_SMS4), pInp += (64 * MBS_SMS4), pOut += (64 * MBS_SMS4)) {
-         int itr; 
+         int itr;
 
          TMP[0]  = TMP[20];
          TMP[1]  = inc512(TMP[0], next_inc);
          TMP[2]  = inc512(TMP[1], next_inc);
          TMP[3]  = inc512(TMP[2], next_inc);
-         TMP[20] = inc512(TMP[3], next_inc);    
+         TMP[20] = inc512(TMP[3], next_inc);
 
          TMP[0] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[0], TMP[21]));
          TMP[1] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[1], TMP[21]));
          TMP[2] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[2], TMP[21]));
          TMP[3] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[3], TMP[21]));
 
-         TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endiannes_swap));
-         TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endiannes_swap));
-         TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endiannes_swap));
-         TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endiannes_swap));
+         TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endianness_swap));
+         TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endianness_swap));
+         TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endianness_swap));
+         TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endianness_swap));
          TRANSPOSE_INP_512(TMP[4], TMP[5], TMP[6], TMP[7], TMP[0], TMP[1], TMP[2], TMP[3]);
-         
+
          TMP[0]  = TMP[20];
          TMP[1]  = inc512(TMP[0], next_inc);
          TMP[2]  = inc512(TMP[1], next_inc);
          TMP[3]  = inc512(TMP[2], next_inc);
-         TMP[20] = inc512(TMP[3], next_inc);    
+         TMP[20] = inc512(TMP[3], next_inc);
 
          TMP[0] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[0], TMP[21]));
          TMP[1] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[1], TMP[21]));
          TMP[2] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[2], TMP[21]));
          TMP[3] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[3], TMP[21]));
 
-         TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endiannes_swap));
-         TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endiannes_swap));
-         TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endiannes_swap));
-         TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endiannes_swap));
+         TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endianness_swap));
+         TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endianness_swap));
+         TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endianness_swap));
+         TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endianness_swap));
          TRANSPOSE_INP_512(TMP[8], TMP[9], TMP[10], TMP[11], TMP[0], TMP[1], TMP[2], TMP[3]);
 
          TMP[0]  = TMP[20];
          TMP[1]  = inc512(TMP[0], next_inc);
          TMP[2]  = inc512(TMP[1], next_inc);
          TMP[3]  = inc512(TMP[2], next_inc);
-         TMP[20] = inc512(TMP[3], next_inc);    
+         TMP[20] = inc512(TMP[3], next_inc);
 
          TMP[0] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[0], TMP[21]));
          TMP[1] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[1], TMP[21]));
          TMP[2] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[2], TMP[21]));
          TMP[3] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[3], TMP[21]));
 
-         TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endiannes_swap));
-         TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endiannes_swap));
-         TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endiannes_swap));
-         TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endiannes_swap));
+         TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endianness_swap));
+         TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endianness_swap));
+         TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endianness_swap));
+         TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endianness_swap));
          TRANSPOSE_INP_512(TMP[12], TMP[13], TMP[14], TMP[15], TMP[0], TMP[1], TMP[2], TMP[3]);
 
          TMP[0]  = TMP[20];
          TMP[1]  = inc512(TMP[0], next_inc);
          TMP[2]  = inc512(TMP[1], next_inc);
          TMP[3]  = inc512(TMP[2], next_inc);
-         TMP[20] = inc512(TMP[3], next_inc);    
+         TMP[20] = inc512(TMP[3], next_inc);
 
          TMP[0] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[0], TMP[21]));
          TMP[1] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[1], TMP[21]));
          TMP[2] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[2], TMP[21]));
          TMP[3] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[3], TMP[21]));
 
-         TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endiannes_swap));
-         TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endiannes_swap));
-         TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endiannes_swap));
-         TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endiannes_swap));
+         TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endianness_swap));
+         TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endianness_swap));
+         TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endianness_swap));
+         TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endianness_swap));
          TRANSPOSE_INP_512(TMP[16], TMP[17], TMP[18], TMP[19], TMP[0], TMP[1], TMP[2], TMP[3]);
 
-         
+
          for (itr = 0; itr < 8; itr++, pRKey += 4) {
          /* initial xors */
          TMP[3] = TMP[2] = TMP[1] = TMP[0] = _mm512_set1_epi32((Ipp32s)pRKey[0]);
@@ -303,7 +303,7 @@ IPP_OWN_DEFN (int, cpSMS4_CTR_gfni512, (Ipp8u* pOut, const Ipp8u* pInp, int len,
          TMP[19] = _mm512_xor_si512(_mm512_xor_si512(TMP[19], TMP[3]), L512(TMP[3]));
 
          }
-         
+
          pRKey -= 32;
 
          TRANSPOSE_OUT_512(TMP[0], TMP[1], TMP[2], TMP[3], TMP[4], TMP[5], TMP[6], TMP[7]);
@@ -350,7 +350,7 @@ IPP_OWN_DEFN (int, cpSMS4_CTR_gfni512, (Ipp8u* pOut, const Ipp8u* pInp, int len,
 
       /* Save counter */
       TMP[20] = _mm512_xor_si512(TMP[22], _mm512_and_si512(TMP[20], TMP[21]));
-      TMP[20] = _mm512_shuffle_epi8(TMP[20],  M512(endiannes));
+      TMP[20] = _mm512_shuffle_epi8(TMP[20],  M512(endianness));
       _mm_storeu_si128((__m128i*)pCtr, _mm512_castsi512_si128(TMP[20]));
 
       /* clear secret data */
@@ -359,7 +359,7 @@ IPP_OWN_DEFN (int, cpSMS4_CTR_gfni512, (Ipp8u* pOut, const Ipp8u* pInp, int len,
       }
 
    }
-   
+
    len -= processedLen;
    if (len)
       processedLen += cpSMS4_CTR_gfni512x48(pOut, pInp, len, pRKey, pCtrMask, pCtr);
@@ -386,76 +386,76 @@ int cpSMS4_CTR_gfni512x48(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u*
       // TMP[18] - ctrUnch
 
       TMP[16]  = _mm512_broadcast_i64x2(_mm_loadu_si128((__m128i*)pCtr));
-      TMP[17]  = _mm512_broadcast_i64x2(_mm_loadu_si128((__m128i*)pCtrMask)); 
+      TMP[17]  = _mm512_broadcast_i64x2(_mm_loadu_si128((__m128i*)pCtrMask));
 
       /* read string counter and convert to numerical */
-      TMP[16]  = _mm512_shuffle_epi8(TMP[16], M512(endiannes));
+      TMP[16]  = _mm512_shuffle_epi8(TMP[16], M512(endianness));
 
       /* read string mask and convert to numerical */
-      TMP[17]  = _mm512_shuffle_epi8(TMP[17], M512(endiannes));
+      TMP[17]  = _mm512_shuffle_epi8(TMP[17], M512(endianness));
 
       /* upchanged counter bits */
       TMP[18] = _mm512_andnot_si512(TMP[17], TMP[16]);
-      
+
       /* first incremention */
       TMP[16] = inc512(TMP[16], first_inc);
-      
+
       TMP[16] = _mm512_and_si512(TMP[17], TMP[16]);
 
       for (n = 0; n < processedLen; n += (48 * MBS_SMS4), pInp += (48 * MBS_SMS4), pOut += (48 * MBS_SMS4)) {
-         int itr; 
+         int itr;
 
             TMP[0]  = TMP[16];
             TMP[1]  = inc512(TMP[0], next_inc);
             TMP[2]  = inc512(TMP[1], next_inc);
             TMP[3]  = inc512(TMP[2], next_inc);
-            TMP[16] = inc512(TMP[3], next_inc);    
+            TMP[16] = inc512(TMP[3], next_inc);
 
             TMP[0] = _mm512_xor_si512(TMP[18], _mm512_and_si512(TMP[0], TMP[17]));
             TMP[1] = _mm512_xor_si512(TMP[18], _mm512_and_si512(TMP[1], TMP[17]));
             TMP[2] = _mm512_xor_si512(TMP[18], _mm512_and_si512(TMP[2], TMP[17]));
             TMP[3] = _mm512_xor_si512(TMP[18], _mm512_and_si512(TMP[3], TMP[17]));
 
-            TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endiannes_swap));
-            TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endiannes_swap));
-            TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endiannes_swap));
-            TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endiannes_swap));
+            TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endianness_swap));
+            TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endianness_swap));
+            TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endianness_swap));
+            TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endianness_swap));
             TRANSPOSE_INP_512(TMP[4], TMP[5], TMP[6], TMP[7], TMP[0], TMP[1], TMP[2], TMP[3]);
-            
+
             TMP[0]  = TMP[16];
             TMP[1]  = inc512(TMP[0], next_inc);
             TMP[2]  = inc512(TMP[1], next_inc);
             TMP[3]  = inc512(TMP[2], next_inc);
-            TMP[16] = inc512(TMP[3], next_inc);    
+            TMP[16] = inc512(TMP[3], next_inc);
 
             TMP[0] = _mm512_xor_si512(TMP[18], _mm512_and_si512(TMP[0], TMP[17]));
             TMP[1] = _mm512_xor_si512(TMP[18], _mm512_and_si512(TMP[1], TMP[17]));
             TMP[2] = _mm512_xor_si512(TMP[18], _mm512_and_si512(TMP[2], TMP[17]));
             TMP[3] = _mm512_xor_si512(TMP[18], _mm512_and_si512(TMP[3], TMP[17]));
 
-            TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endiannes_swap));
-            TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endiannes_swap));
-            TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endiannes_swap));
-            TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endiannes_swap));
+            TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endianness_swap));
+            TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endianness_swap));
+            TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endianness_swap));
+            TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endianness_swap));
             TRANSPOSE_INP_512(TMP[8], TMP[9], TMP[10], TMP[11], TMP[0], TMP[1], TMP[2], TMP[3]);
 
             TMP[0]  = TMP[16];
             TMP[1]  = inc512(TMP[0], next_inc);
             TMP[2]  = inc512(TMP[1], next_inc);
             TMP[3]  = inc512(TMP[2], next_inc);
-            TMP[16] = inc512(TMP[3], next_inc);    
+            TMP[16] = inc512(TMP[3], next_inc);
 
             TMP[0] = _mm512_xor_si512(TMP[18], _mm512_and_si512(TMP[0], TMP[17]));
             TMP[1] = _mm512_xor_si512(TMP[18], _mm512_and_si512(TMP[1], TMP[17]));
             TMP[2] = _mm512_xor_si512(TMP[18], _mm512_and_si512(TMP[2], TMP[17]));
             TMP[3] = _mm512_xor_si512(TMP[18], _mm512_and_si512(TMP[3], TMP[17]));
 
-            TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endiannes_swap));
-            TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endiannes_swap));
-            TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endiannes_swap));
-            TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endiannes_swap));
+            TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endianness_swap));
+            TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endianness_swap));
+            TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endianness_swap));
+            TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endianness_swap));
             TRANSPOSE_INP_512(TMP[12], TMP[13], TMP[14], TMP[15], TMP[0], TMP[1], TMP[2], TMP[3]);
-            
+
             for (itr = 0; itr < 8; itr++, pRKey += 4) {
             /* initial xors */
             TMP[2] = TMP[1] = TMP[0] = _mm512_set1_epi32((Ipp32s)pRKey[0]);
@@ -538,7 +538,7 @@ int cpSMS4_CTR_gfni512x48(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u*
             TMP[15] = _mm512_xor_si512(_mm512_xor_si512(TMP[15], TMP[2]), L512(TMP[2]));
 
          }
-         
+
          pRKey -= 32;
 
          TRANSPOSE_OUT_512(TMP[0], TMP[1], TMP[2], TMP[3], TMP[4], TMP[5], TMP[6], TMP[7]);
@@ -575,7 +575,7 @@ int cpSMS4_CTR_gfni512x48(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u*
 
       /* Save counter */
       TMP[16] = _mm512_xor_si512(TMP[18], _mm512_and_si512(TMP[16], TMP[17]));
-      TMP[16] = _mm512_shuffle_epi8(TMP[16],  M512(endiannes));
+      TMP[16] = _mm512_shuffle_epi8(TMP[16],  M512(endianness));
       _mm_storeu_si128((__m128i*)pCtr, _mm512_castsi512_si128(TMP[16]));
 
       /* clear secret data */
@@ -584,7 +584,7 @@ int cpSMS4_CTR_gfni512x48(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u*
       }
 
    }
-   
+
    len -= processedLen;
    if (len)
       processedLen += cpSMS4_CTR_gfni512x32(pOut, pInp, len, pRKey, pCtrMask, pCtr);
@@ -611,59 +611,59 @@ int cpSMS4_CTR_gfni512x32(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u*
       // TMP[14] - ctrUnch
 
       TMP[12]  = _mm512_broadcast_i64x2(_mm_loadu_si128((__m128i*)pCtr));
-      TMP[13]  = _mm512_broadcast_i64x2(_mm_loadu_si128((__m128i*)pCtrMask)); 
+      TMP[13]  = _mm512_broadcast_i64x2(_mm_loadu_si128((__m128i*)pCtrMask));
 
       /* read string counter and convert to numerical */
-      TMP[12]  = _mm512_shuffle_epi8(TMP[12], M512(endiannes));
+      TMP[12]  = _mm512_shuffle_epi8(TMP[12], M512(endianness));
 
       /* read string mask and convert to numerical */
-      TMP[13]  = _mm512_shuffle_epi8(TMP[13], M512(endiannes));
+      TMP[13]  = _mm512_shuffle_epi8(TMP[13], M512(endianness));
 
       /* upchanged counter bits */
       TMP[14] = _mm512_andnot_si512(TMP[13], TMP[12]);
-      
+
       /* first incremention */
       TMP[12] = inc512(TMP[12], first_inc);
-      
+
       TMP[12] = _mm512_and_si512(TMP[13], TMP[12]);
 
       for (n = 0; n < processedLen; n += (32 * MBS_SMS4), pInp += (32 * MBS_SMS4), pOut += (32 * MBS_SMS4)) {
-         int itr; 
+         int itr;
 
             TMP[0]  = TMP[12];
             TMP[1]  = inc512(TMP[0], next_inc);
             TMP[2]  = inc512(TMP[1], next_inc);
             TMP[3]  = inc512(TMP[2], next_inc);
-            TMP[12] = inc512(TMP[3], next_inc);    
+            TMP[12] = inc512(TMP[3], next_inc);
 
             TMP[0] = _mm512_xor_si512(TMP[14], _mm512_and_si512(TMP[0], TMP[13]));
             TMP[1] = _mm512_xor_si512(TMP[14], _mm512_and_si512(TMP[1], TMP[13]));
             TMP[2] = _mm512_xor_si512(TMP[14], _mm512_and_si512(TMP[2], TMP[13]));
             TMP[3] = _mm512_xor_si512(TMP[14], _mm512_and_si512(TMP[3], TMP[13]));
 
-            TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endiannes_swap));
-            TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endiannes_swap));
-            TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endiannes_swap));
-            TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endiannes_swap));
+            TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endianness_swap));
+            TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endianness_swap));
+            TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endianness_swap));
+            TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endianness_swap));
             TRANSPOSE_INP_512(TMP[4], TMP[5], TMP[6], TMP[7], TMP[0], TMP[1], TMP[2], TMP[3]);
-            
+
             TMP[0]  = TMP[12];
             TMP[1]  = inc512(TMP[0], next_inc);
             TMP[2]  = inc512(TMP[1], next_inc);
             TMP[3]  = inc512(TMP[2], next_inc);
-            TMP[12] = inc512(TMP[3], next_inc);    
+            TMP[12] = inc512(TMP[3], next_inc);
 
             TMP[0] = _mm512_xor_si512(TMP[14], _mm512_and_si512(TMP[0], TMP[13]));
             TMP[1] = _mm512_xor_si512(TMP[14], _mm512_and_si512(TMP[1], TMP[13]));
             TMP[2] = _mm512_xor_si512(TMP[14], _mm512_and_si512(TMP[2], TMP[13]));
             TMP[3] = _mm512_xor_si512(TMP[14], _mm512_and_si512(TMP[3], TMP[13]));
 
-            TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endiannes_swap));
-            TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endiannes_swap));
-            TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endiannes_swap));
-            TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endiannes_swap));
+            TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endianness_swap));
+            TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endianness_swap));
+            TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endianness_swap));
+            TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endianness_swap));
             TRANSPOSE_INP_512(TMP[8], TMP[9], TMP[10], TMP[11], TMP[0], TMP[1], TMP[2], TMP[3]);
-            
+
             for (itr = 0; itr < 8; itr++, pRKey += 4) {
             /* initial xors */
             TMP[1] = TMP[0] = _mm512_set1_epi32((Ipp32s)pRKey[0]);
@@ -726,7 +726,7 @@ int cpSMS4_CTR_gfni512x32(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u*
             TMP[11] = _mm512_xor_si512(_mm512_xor_si512(TMP[11], TMP[1]), L512(TMP[1]));
 
          }
-         
+
          pRKey -= 32;
 
          TRANSPOSE_OUT_512(TMP[0], TMP[1], TMP[2], TMP[3], TMP[4], TMP[5], TMP[6], TMP[7]);
@@ -753,7 +753,7 @@ int cpSMS4_CTR_gfni512x32(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u*
 
       /* Save counter */
       TMP[12] = _mm512_xor_si512(TMP[14], _mm512_and_si512(TMP[12], TMP[13]));
-      TMP[12] = _mm512_shuffle_epi8(TMP[12],  M512(endiannes));
+      TMP[12] = _mm512_shuffle_epi8(TMP[12],  M512(endianness));
       _mm_storeu_si128((__m128i*)pCtr, _mm512_castsi512_si128(TMP[12]));
 
       /* clear secret data */
@@ -762,7 +762,7 @@ int cpSMS4_CTR_gfni512x32(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u*
       }
 
    }
-   
+
    len -= processedLen;
    if (len)
       processedLen += cpSMS4_CTR_gfni512x16(pOut, pInp, len, pRKey, pCtrMask, pCtr);
@@ -789,40 +789,40 @@ int cpSMS4_CTR_gfni512x16(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u*
       // TMP[10] - ctrUnch
 
       TMP[8]  = _mm512_broadcast_i64x2(_mm_loadu_si128((__m128i*)pCtr));
-      TMP[9]  = _mm512_broadcast_i64x2(_mm_loadu_si128((__m128i*)pCtrMask)); 
+      TMP[9]  = _mm512_broadcast_i64x2(_mm_loadu_si128((__m128i*)pCtrMask));
 
       /* read string counter and convert to numerical */
-      TMP[8]  = _mm512_shuffle_epi8(TMP[8], M512(endiannes));
+      TMP[8]  = _mm512_shuffle_epi8(TMP[8], M512(endianness));
 
       /* read string mask and convert to numerical */
-      TMP[9]  = _mm512_shuffle_epi8(TMP[9], M512(endiannes));
+      TMP[9]  = _mm512_shuffle_epi8(TMP[9], M512(endianness));
 
       /* upchanged counter bits */
       TMP[10] = _mm512_andnot_si512(TMP[9], TMP[8]);
-      
+
       /* first incremention */
       TMP[8] = inc512(TMP[8], first_inc);
-      
+
       TMP[8] = _mm512_and_si512(TMP[9], TMP[8]);
 
       for (n = 0; n < processedLen; n += (16 * MBS_SMS4), pInp += (16 * MBS_SMS4), pOut += (16 * MBS_SMS4)) {
-         int itr; 
+         int itr;
 
          TMP[0] = TMP[8];
          TMP[1] = inc512(TMP[0], next_inc);
          TMP[2] = inc512(TMP[1], next_inc);
          TMP[3] = inc512(TMP[2], next_inc);
-         TMP[8] = inc512(TMP[3], next_inc);    
+         TMP[8] = inc512(TMP[3], next_inc);
 
          TMP[0] = _mm512_xor_si512(TMP[10], _mm512_and_si512(TMP[0], TMP[9]));
          TMP[1] = _mm512_xor_si512(TMP[10], _mm512_and_si512(TMP[1], TMP[9]));
          TMP[2] = _mm512_xor_si512(TMP[10], _mm512_and_si512(TMP[2], TMP[9]));
          TMP[3] = _mm512_xor_si512(TMP[10], _mm512_and_si512(TMP[3], TMP[9]));
 
-         TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endiannes_swap));
-         TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endiannes_swap));
-         TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endiannes_swap));
-         TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endiannes_swap));
+         TMP[0] = _mm512_shuffle_epi8(TMP[0], M512(endianness_swap));
+         TMP[1] = _mm512_shuffle_epi8(TMP[1], M512(endianness_swap));
+         TMP[2] = _mm512_shuffle_epi8(TMP[2], M512(endianness_swap));
+         TMP[3] = _mm512_shuffle_epi8(TMP[3], M512(endianness_swap));
          TRANSPOSE_INP_512(TMP[4], TMP[5], TMP[6], TMP[7], TMP[0], TMP[1], TMP[2], TMP[3]);
 
          for (itr = 0; itr < 8; itr++, pRKey += 4) {
@@ -883,7 +883,7 @@ int cpSMS4_CTR_gfni512x16(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u*
 
       /* Save counter */
       TMP[8] = _mm512_xor_si512(TMP[10], _mm512_and_si512(TMP[8], TMP[9]));
-      TMP[8] = _mm512_shuffle_epi8(TMP[8],  M512(endiannes));
+      TMP[8] = _mm512_shuffle_epi8(TMP[8],  M512(endianness));
       _mm_storeu_si128((__m128i*)pCtr, _mm512_castsi512_si128(TMP[8]));
 
       /* clear secret data */
@@ -892,7 +892,7 @@ int cpSMS4_CTR_gfni512x16(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u*
       }
 
    }
-   
+
    len -= processedLen;
    if (len)
       processedLen += cpSMS4_CTR_gfni128x12(pOut, pInp, len, pRKey, pCtrMask, pCtr);
@@ -904,14 +904,14 @@ int cpSMS4_CTR_gfni512x16(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u*
 // 12*MBS_SMS4 processing
 */
 
-static 
+static
 int cpSMS4_CTR_gfni128x12(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u* pRKey, const Ipp8u* pCtrMask, Ipp8u* pCtr)
 {
    int processedLen = len - (len % (12 * MBS_SMS4));
    int n;
 
    if(processedLen){
-   
+
       __ALIGN16 __m128i TMP[22];
 
       // TMP[15] - ctr
@@ -921,8 +921,8 @@ int cpSMS4_CTR_gfni128x12(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u*
       TMP[15] = _mm_loadu_si128((__m128i*)pCtr);
       TMP[16] = _mm_loadu_si128((__m128i*)pCtrMask);
 
-      TMP[16] = _mm_shuffle_epi8(TMP[16], M128(endiannes));
-      TMP[15] = _mm_shuffle_epi8(TMP[15], M128(endiannes));
+      TMP[16] = _mm_shuffle_epi8(TMP[16], M128(endianness));
+      TMP[15] = _mm_shuffle_epi8(TMP[15], M128(endianness));
       TMP[17] = _mm_andnot_si128(TMP[16], TMP[15]);
 
       for(n=0; n<processedLen; n+=(12*MBS_SMS4), pInp+=(12*MBS_SMS4), pOut+=(12*MBS_SMS4)) {
@@ -939,10 +939,10 @@ int cpSMS4_CTR_gfni128x12(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u*
          TMP[20] = _mm_xor_si128(TMP[17], _mm_and_si128(TMP[20], TMP[16]));
          TMP[21] = _mm_xor_si128(TMP[17], _mm_and_si128(TMP[21], TMP[16]));
 
-         TMP[3] = _mm_shuffle_epi8(TMP[18], M128(endiannes_swap));
-         TMP[4] = _mm_shuffle_epi8(TMP[19], M128(endiannes_swap));
-         TMP[5] = _mm_shuffle_epi8(TMP[20], M128(endiannes_swap));
-         TMP[6] = _mm_shuffle_epi8(TMP[21], M128(endiannes_swap));
+         TMP[3] = _mm_shuffle_epi8(TMP[18], M128(endianness_swap));
+         TMP[4] = _mm_shuffle_epi8(TMP[19], M128(endianness_swap));
+         TMP[5] = _mm_shuffle_epi8(TMP[20], M128(endianness_swap));
+         TMP[6] = _mm_shuffle_epi8(TMP[21], M128(endianness_swap));
 
          TRANSPOSE_INP_128(TMP[3],TMP[4],TMP[5],TMP[6], TMP[0]);
 
@@ -957,10 +957,10 @@ int cpSMS4_CTR_gfni128x12(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u*
          TMP[20] = _mm_xor_si128(TMP[17], _mm_and_si128(TMP[20], TMP[16]));
          TMP[21] = _mm_xor_si128(TMP[17], _mm_and_si128(TMP[21], TMP[16]));
 
-         TMP[7]  = _mm_shuffle_epi8(TMP[18], M128(endiannes_swap));
-         TMP[8]  = _mm_shuffle_epi8(TMP[19], M128(endiannes_swap));
-         TMP[9]  = _mm_shuffle_epi8(TMP[20], M128(endiannes_swap));
-         TMP[10] = _mm_shuffle_epi8(TMP[21], M128(endiannes_swap));
+         TMP[7]  = _mm_shuffle_epi8(TMP[18], M128(endianness_swap));
+         TMP[8]  = _mm_shuffle_epi8(TMP[19], M128(endianness_swap));
+         TMP[9]  = _mm_shuffle_epi8(TMP[20], M128(endianness_swap));
+         TMP[10] = _mm_shuffle_epi8(TMP[21], M128(endianness_swap));
 
          TRANSPOSE_INP_128(TMP[7],TMP[8],TMP[9],TMP[10], TMP[0]);
 
@@ -975,10 +975,10 @@ int cpSMS4_CTR_gfni128x12(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u*
          TMP[20] = _mm_xor_si128(TMP[17], _mm_and_si128(TMP[20], TMP[16]));
          TMP[21] = _mm_xor_si128(TMP[17], _mm_and_si128(TMP[21], TMP[16]));
 
-         TMP[11] = _mm_shuffle_epi8(TMP[18], M128(endiannes_swap));
-         TMP[12] = _mm_shuffle_epi8(TMP[19], M128(endiannes_swap));
-         TMP[13] = _mm_shuffle_epi8(TMP[20], M128(endiannes_swap));
-         TMP[14] = _mm_shuffle_epi8(TMP[21], M128(endiannes_swap));
+         TMP[11] = _mm_shuffle_epi8(TMP[18], M128(endianness_swap));
+         TMP[12] = _mm_shuffle_epi8(TMP[19], M128(endianness_swap));
+         TMP[13] = _mm_shuffle_epi8(TMP[20], M128(endianness_swap));
+         TMP[14] = _mm_shuffle_epi8(TMP[21], M128(endianness_swap));
 
          TRANSPOSE_INP_128(TMP[11],TMP[12],TMP[13],TMP[14], TMP[0]);
 
@@ -1097,7 +1097,7 @@ int cpSMS4_CTR_gfni128x12(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u*
          _mm_storeu_si128((__m128i*)(pOut + MBS_SMS4 * 5), _mm_xor_si128(TMP[9],  _mm_loadu_si128((__m128i*)(pInp + MBS_SMS4 * 5))));
          _mm_storeu_si128((__m128i*)(pOut + MBS_SMS4 * 6), _mm_xor_si128(TMP[8],  _mm_loadu_si128((__m128i*)(pInp + MBS_SMS4 * 6))));
          _mm_storeu_si128((__m128i*)(pOut + MBS_SMS4 * 7), _mm_xor_si128(TMP[7],  _mm_loadu_si128((__m128i*)(pInp + MBS_SMS4 * 7))));
-        
+
 
          TRANSPOSE_OUT_128(TMP[11],TMP[12],TMP[13],TMP[14], TMP[0]);
 
@@ -1114,7 +1114,7 @@ int cpSMS4_CTR_gfni128x12(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u*
       }
 
       TMP[15] = _mm_xor_si128(TMP[17], _mm_and_si128(TMP[15], TMP[16]));
-      TMP[15] = _mm_shuffle_epi8(TMP[15], M128(endiannes));
+      TMP[15] = _mm_shuffle_epi8(TMP[15], M128(endianness));
       _mm_storeu_si128((__m128i*)pCtr, TMP[15]);
 
       /* clear secret data */
@@ -1134,14 +1134,14 @@ int cpSMS4_CTR_gfni128x12(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u*
 // 8*MBS_SMS4 processing
 */
 
-static 
+static
 int cpSMS4_CTR_gfni128x8(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u* pRKey, const Ipp8u* pCtrMask, Ipp8u* pCtr)
 {
    int processedLen = len - (len % (8 * MBS_SMS4));
    int n;
 
    if(processedLen){
-   
+
       __ALIGN16 __m128i TMP[18];
 
       // TMP[11] - ctr
@@ -1151,8 +1151,8 @@ int cpSMS4_CTR_gfni128x8(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u*
       TMP[11] = _mm_loadu_si128((__m128i*)pCtr);
       TMP[12] = _mm_loadu_si128((__m128i*)pCtrMask);
 
-      TMP[12] = _mm_shuffle_epi8(TMP[12], M128(endiannes));
-      TMP[11] = _mm_shuffle_epi8(TMP[11], M128(endiannes));
+      TMP[12] = _mm_shuffle_epi8(TMP[12], M128(endianness));
+      TMP[11] = _mm_shuffle_epi8(TMP[11], M128(endianness));
       TMP[13] = _mm_andnot_si128(TMP[12], TMP[11]);
 
       for(n=0; n<processedLen; n+=(8*MBS_SMS4), pInp+=(8*MBS_SMS4), pOut+=(8*MBS_SMS4)) {
@@ -1169,10 +1169,10 @@ int cpSMS4_CTR_gfni128x8(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u*
          TMP[16] = _mm_xor_si128(TMP[13], _mm_and_si128(TMP[16], TMP[12]));
          TMP[17] = _mm_xor_si128(TMP[13], _mm_and_si128(TMP[17], TMP[12]));
 
-         TMP[3] = _mm_shuffle_epi8(TMP[14], M128(endiannes_swap));
-         TMP[4] = _mm_shuffle_epi8(TMP[15], M128(endiannes_swap));
-         TMP[5] = _mm_shuffle_epi8(TMP[16], M128(endiannes_swap));
-         TMP[6] = _mm_shuffle_epi8(TMP[17], M128(endiannes_swap));
+         TMP[3] = _mm_shuffle_epi8(TMP[14], M128(endianness_swap));
+         TMP[4] = _mm_shuffle_epi8(TMP[15], M128(endianness_swap));
+         TMP[5] = _mm_shuffle_epi8(TMP[16], M128(endianness_swap));
+         TMP[6] = _mm_shuffle_epi8(TMP[17], M128(endianness_swap));
 
          TRANSPOSE_INP_128(TMP[3],TMP[4],TMP[5],TMP[6], TMP[0]);
 
@@ -1187,10 +1187,10 @@ int cpSMS4_CTR_gfni128x8(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u*
          TMP[16] = _mm_xor_si128(TMP[13], _mm_and_si128(TMP[16], TMP[12]));
          TMP[17] = _mm_xor_si128(TMP[13], _mm_and_si128(TMP[17], TMP[12]));
 
-         TMP[7]  = _mm_shuffle_epi8(TMP[14], M128(endiannes_swap));
-         TMP[8]  = _mm_shuffle_epi8(TMP[15], M128(endiannes_swap));
-         TMP[9]  = _mm_shuffle_epi8(TMP[16], M128(endiannes_swap));
-         TMP[10] = _mm_shuffle_epi8(TMP[17], M128(endiannes_swap));
+         TMP[7]  = _mm_shuffle_epi8(TMP[14], M128(endianness_swap));
+         TMP[8]  = _mm_shuffle_epi8(TMP[15], M128(endianness_swap));
+         TMP[9]  = _mm_shuffle_epi8(TMP[16], M128(endianness_swap));
+         TMP[10] = _mm_shuffle_epi8(TMP[17], M128(endianness_swap));
 
          TRANSPOSE_INP_128(TMP[7],TMP[8],TMP[9],TMP[10], TMP[0]);
 
@@ -1290,7 +1290,7 @@ int cpSMS4_CTR_gfni128x8(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u*
       }
 
       TMP[11] = _mm_xor_si128(TMP[13], _mm_and_si128(TMP[11], TMP[12]));
-      TMP[11] = _mm_shuffle_epi8(TMP[11], M128(endiannes));
+      TMP[11] = _mm_shuffle_epi8(TMP[11], M128(endianness));
       _mm_storeu_si128((__m128i*)pCtr, TMP[11]);
 
       /* clear secret data */
@@ -1331,8 +1331,8 @@ int cpSMS4_ECB_gfni128x4(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u*
    TMP[6] = _mm_loadu_si128((__m128i*)pCtrMask);
    TMP[7] = _mm_loadu_si128((__m128i*)pCtr);
 
-   TMP[6] = _mm_shuffle_epi8(TMP[6], M128(endiannes));
-   TMP[7] = _mm_shuffle_epi8(TMP[7], M128(endiannes));
+   TMP[6] = _mm_shuffle_epi8(TMP[6], M128(endianness));
+   TMP[7] = _mm_shuffle_epi8(TMP[7], M128(endianness));
    TMP[5] = _mm_andnot_si128(TMP[6], TMP[7]);
 
    for(n=0; n<processedLen; n+=(4*MBS_SMS4), pInp+=(4*MBS_SMS4), pOut+=(4*MBS_SMS4)) {
@@ -1348,10 +1348,10 @@ int cpSMS4_ECB_gfni128x4(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u*
       TMP[3] = _mm_xor_si128(TMP[5], _mm_and_si128(TMP[3], TMP[6]));
       TMP[4] = _mm_xor_si128(TMP[5], _mm_and_si128(TMP[4], TMP[6]));
 
-      TMP[1] = _mm_shuffle_epi8(TMP[1], M128(endiannes_swap));
-      TMP[2] = _mm_shuffle_epi8(TMP[2], M128(endiannes_swap));
-      TMP[3] = _mm_shuffle_epi8(TMP[3], M128(endiannes_swap));
-      TMP[4] = _mm_shuffle_epi8(TMP[4], M128(endiannes_swap));
+      TMP[1] = _mm_shuffle_epi8(TMP[1], M128(endianness_swap));
+      TMP[2] = _mm_shuffle_epi8(TMP[2], M128(endianness_swap));
+      TMP[3] = _mm_shuffle_epi8(TMP[3], M128(endianness_swap));
+      TMP[4] = _mm_shuffle_epi8(TMP[4], M128(endianness_swap));
       TRANSPOSE_INP_128(TMP[1],TMP[2],TMP[3],TMP[4], TMP[0]);
 
       for(itr=0; itr<8; itr++, pRKey+=4) {
@@ -1410,7 +1410,7 @@ int cpSMS4_ECB_gfni128x4(Ipp8u* pOut, const Ipp8u* pInp, int len, const Ipp32u*
    }
 
    TMP[7] = _mm_xor_si128(TMP[5], _mm_and_si128(TMP[7], TMP[6]));
-   TMP[7] = _mm_shuffle_epi8(TMP[7], M128(endiannes));
+   TMP[7] = _mm_shuffle_epi8(TMP[7], M128(endianness));
    _mm_storeu_si128((__m128i*)pCtr, TMP[7]);
 
    /* clear secret data */
diff --git a/sources/ippcp/pcpsms4ctrl9cn.c b/sources/ippcp/pcpsms4ctrl9cn.c
index 56eda6fb..a81d1060 100644
--- a/sources/ippcp/pcpsms4ctrl9cn.c
+++ b/sources/ippcp/pcpsms4ctrl9cn.c
@@ -14,16 +14,16 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     SMS4 EBC decryption
-// 
+//
 //  Contents:
 //     cpSMS4_CTR_aesni()
-// 
-// 
+//
+//
 */
 
 #include "pcpsms4.h"
@@ -35,10 +35,10 @@
 
 #include "pcpsms4_l9cn.h"
 
-static __ALIGN32 Ipp8u endiannes_swap[] = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3,
+static __ALIGN32 Ipp8u endianness_swap[] = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3,
                                            12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
 
-static __ALIGN32 Ipp8u endiannes[] = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0,
+static __ALIGN32 Ipp8u endianness[] = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0,
                                       15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
 
 static __ALIGN32 Ipp8u two256[] = {2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@@ -46,7 +46,7 @@ static __ALIGN32 Ipp8u two256[] = {2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 static __ALIGN16 Ipp8u one256[] = {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                    1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
 
-__INLINE __m128i inc128(__m128i x)
+__IPPCP_INLINE __m128i inc128(__m128i x)
 {
    __m128i t = _mm_add_epi64(x,  M128(one256));
    x = _mm_cmpeq_epi64(t,  _mm_setzero_si128());
@@ -54,14 +54,14 @@ __INLINE __m128i inc128(__m128i x)
    return t;
 }
 
-__INLINE __m256i inc256(__m256i x)
+__IPPCP_INLINE __m256i inc256(__m256i x)
 {
    __m256i t = _mm256_add_epi64(x,  M256(two256));
    x = _mm256_cmpeq_epi64(t,  _mm256_setzero_si256());
    t = _mm256_sub_epi64(t, _mm256_slli_si256(x, sizeof(Ipp64u)));
    return t;
 }
-__INLINE __m256i inc256_2(__m256i x)
+__IPPCP_INLINE __m256i inc256_2(__m256i x)
 {
    __m256i t = _mm256_add_epi64(x,  M256(one256));
    x = _mm256_cmpeq_epi64(t,  _mm256_setzero_si256());
@@ -100,10 +100,10 @@ IPP_OWN_DEFN (int, cpSMS4_CTR_aesni, (Ipp8u* pOut, const Ipp8u* pInp, int len, c
    */
 
       /* read string counter and convert to numerical */
-      TMP[12]  = _mm256_shuffle_epi8(_mm256_castsi128_si256(_mm_loadu_si128((__m128i*)pCtr)), M256(endiannes));
+      TMP[12]  = _mm256_shuffle_epi8(_mm256_castsi128_si256(_mm_loadu_si128((__m128i*)pCtr)), M256(endianness));
 
       /* read string mask and convert to numerical */
-      TMP[13] = _mm256_shuffle_epi8(_mm256_castsi128_si256(_mm_loadu_si128((__m128i*)pCtrMask)), M256(endiannes));
+      TMP[13] = _mm256_shuffle_epi8(_mm256_castsi128_si256(_mm_loadu_si128((__m128i*)pCtrMask)), M256(endianness));
 
       /* upchanged counter bits */
       TMP[14] = _mm256_andnot_si256(TMP[13], TMP[12]);
@@ -125,10 +125,10 @@ IPP_OWN_DEFN (int, cpSMS4_CTR_aesni, (Ipp8u* pOut, const Ipp8u* pInp, int len, c
          TMP[1] = _mm256_xor_si256(TMP[14], _mm256_and_si256(TMP[1], TMP[13]));
          TMP[2] = _mm256_xor_si256(TMP[14], _mm256_and_si256(TMP[2], TMP[13]));
          TMP[3] = _mm256_xor_si256(TMP[14], _mm256_and_si256(TMP[3], TMP[13]));
-         TMP[0] = _mm256_shuffle_epi8(TMP[0], M256(endiannes_swap));
-         TMP[1] = _mm256_shuffle_epi8(TMP[1], M256(endiannes_swap));
-         TMP[2] = _mm256_shuffle_epi8(TMP[2], M256(endiannes_swap));
-         TMP[3] = _mm256_shuffle_epi8(TMP[3], M256(endiannes_swap));
+         TMP[0] = _mm256_shuffle_epi8(TMP[0], M256(endianness_swap));
+         TMP[1] = _mm256_shuffle_epi8(TMP[1], M256(endianness_swap));
+         TMP[2] = _mm256_shuffle_epi8(TMP[2], M256(endianness_swap));
+         TMP[3] = _mm256_shuffle_epi8(TMP[3], M256(endianness_swap));
          TRANSPOSE_INP(TMP[4],TMP[5],TMP[6],TMP[7], TMP[0],TMP[1],TMP[2],TMP[3]);
 
          TMP[0] = TMP[12];
@@ -140,10 +140,10 @@ IPP_OWN_DEFN (int, cpSMS4_CTR_aesni, (Ipp8u* pOut, const Ipp8u* pInp, int len, c
          TMP[1] = _mm256_xor_si256(TMP[14], _mm256_and_si256(TMP[1], TMP[13]));
          TMP[2] = _mm256_xor_si256(TMP[14], _mm256_and_si256(TMP[2], TMP[13]));
          TMP[3] = _mm256_xor_si256(TMP[14], _mm256_and_si256(TMP[3], TMP[13]));
-         TMP[0] = _mm256_shuffle_epi8(TMP[0], M256(endiannes_swap));
-         TMP[1] = _mm256_shuffle_epi8(TMP[1], M256(endiannes_swap));
-         TMP[2] = _mm256_shuffle_epi8(TMP[2], M256(endiannes_swap));
-         TMP[3] = _mm256_shuffle_epi8(TMP[3], M256(endiannes_swap));
+         TMP[0] = _mm256_shuffle_epi8(TMP[0], M256(endianness_swap));
+         TMP[1] = _mm256_shuffle_epi8(TMP[1], M256(endianness_swap));
+         TMP[2] = _mm256_shuffle_epi8(TMP[2], M256(endianness_swap));
+         TMP[3] = _mm256_shuffle_epi8(TMP[3], M256(endianness_swap));
          TRANSPOSE_INP(TMP[8],TMP[9],TMP[10],TMP[11], TMP[0],TMP[1],TMP[2],TMP[3]);
 
          for(itr=0; itr<8; itr++, pRKey+=4) {
@@ -232,7 +232,7 @@ IPP_OWN_DEFN (int, cpSMS4_CTR_aesni, (Ipp8u* pOut, const Ipp8u* pInp, int len, c
       }
 
       TMP[12] = _mm256_xor_si256(TMP[14], _mm256_and_si256(TMP[12], TMP[13]));
-      TMP[12] = _mm256_shuffle_epi8(TMP[12], M256(endiannes));
+      TMP[12] = _mm256_shuffle_epi8(TMP[12], M256(endianness));
       _mm_storeu_si128((__m128i*)pCtr, _mm256_castsi256_si128(TMP[12]));
 
       /* clear secret data */
diff --git a/sources/ippcp/pcpsms4ctry8cn.c b/sources/ippcp/pcpsms4ctry8cn.c
index c0eaf224..d94a0263 100644
--- a/sources/ippcp/pcpsms4ctry8cn.c
+++ b/sources/ippcp/pcpsms4ctry8cn.c
@@ -14,16 +14,16 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     SMS4 CTR decryption
-// 
+//
 //  Contents:
 //     cpSMS4_CTR_aesni_x4()
-// 
-// 
+//
+//
 */
 
 #include "pcpsms4.h"
@@ -36,10 +36,10 @@
 #include "pcpsms4_y8cn.h"
 
 static __ALIGN16 Ipp8u one128[] = {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-static __ALIGN16 Ipp8u endiannes[] = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
-static __ALIGN16 Ipp8u endiannes_swap[] = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
+static __ALIGN16 Ipp8u endianness[] = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
+static __ALIGN16 Ipp8u endianness_swap[] = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
 
-__INLINE __m128i inc128(__m128i x)
+__IPPCP_INLINE __m128i inc128(__m128i x)
 {
    __m128i t = _mm_add_epi64(x,  M128(one128));
    x = _mm_cmpeq_epi64(t,  _mm_setzero_si128());
@@ -71,8 +71,8 @@ IPP_OWN_DEFN (int, cpSMS4_CTR_aesni, (Ipp8u* pOut, const Ipp8u* pInp, int len, c
    TMP[6] = _mm_loadu_si128((__m128i*)pCtrMask);
    TMP[7] = _mm_loadu_si128((__m128i*)pCtr);
 
-   TMP[6] = _mm_shuffle_epi8(TMP[6], M128(endiannes));
-   TMP[7] = _mm_shuffle_epi8(TMP[7], M128(endiannes));
+   TMP[6] = _mm_shuffle_epi8(TMP[6], M128(endianness));
+   TMP[7] = _mm_shuffle_epi8(TMP[7], M128(endianness));
    TMP[5] = _mm_andnot_si128(TMP[6], TMP[7]);
 
    for(n=0; n<processedLen; n+=(4*MBS_SMS4), pInp+=(4*MBS_SMS4), pOut+=(4*MBS_SMS4)) {
@@ -88,10 +88,10 @@ IPP_OWN_DEFN (int, cpSMS4_CTR_aesni, (Ipp8u* pOut, const Ipp8u* pInp, int len, c
       TMP[3] = _mm_xor_si128(TMP[5], _mm_and_si128(TMP[3], TMP[6]));
       TMP[4] = _mm_xor_si128(TMP[5], _mm_and_si128(TMP[4], TMP[6]));
 
-      TMP[1] = _mm_shuffle_epi8(TMP[1], M128(endiannes_swap));
-      TMP[2] = _mm_shuffle_epi8(TMP[2], M128(endiannes_swap));
-      TMP[3] = _mm_shuffle_epi8(TMP[3], M128(endiannes_swap));
-      TMP[4] = _mm_shuffle_epi8(TMP[4], M128(endiannes_swap));
+      TMP[1] = _mm_shuffle_epi8(TMP[1], M128(endianness_swap));
+      TMP[2] = _mm_shuffle_epi8(TMP[2], M128(endianness_swap));
+      TMP[3] = _mm_shuffle_epi8(TMP[3], M128(endianness_swap));
+      TMP[4] = _mm_shuffle_epi8(TMP[4], M128(endianness_swap));
       TRANSPOSE_INP(TMP[1],TMP[2],TMP[3],TMP[4], TMP[0]);
 
       for(itr=0; itr<8; itr++, pRKey+=4) {
@@ -150,7 +150,7 @@ IPP_OWN_DEFN (int, cpSMS4_CTR_aesni, (Ipp8u* pOut, const Ipp8u* pInp, int len, c
    }
 
    TMP[7] = _mm_xor_si128(TMP[5], _mm_and_si128(TMP[7], TMP[6]));
-   TMP[7] = _mm_shuffle_epi8(TMP[7], M128(endiannes));
+   TMP[7] = _mm_shuffle_epi8(TMP[7], M128(endianness));
    _mm_storeu_si128((__m128i*)pCtr, TMP[7]);
 
    /* clear secret data */
diff --git a/sources/ippcp/pcpsms4ecb_setkeysy8cn.c b/sources/ippcp/pcpsms4ecb_setkeysy8cn.c
index 53f5b207..0eae0065 100644
--- a/sources/ippcp/pcpsms4ecb_setkeysy8cn.c
+++ b/sources/ippcp/pcpsms4ecb_setkeysy8cn.c
@@ -14,16 +14,16 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     SMS4 ECB encryption/decryption
-// 
+//
 //  Contents:
 //     cpSMS4_SetRoundKeys_aesni()
-// 
-// 
+//
+//
 */
 
 #include "owndefs.h"
@@ -35,7 +35,7 @@
 
 #include "pcpsms4_y8cn.h"
 
-__INLINE __m128i Ltag(__m128i x)
+__IPPCP_INLINE __m128i Ltag(__m128i x)
 {
    __m128i T = _mm_slli_epi32(x, 13);
    T = _mm_xor_si128(T, _mm_srli_epi32 (x,19));
diff --git a/sources/ippcp/pcptdesencryptcbcpx.c b/sources/ippcp/pcptdesencryptcbcpx.c
index d275290f..f7b38af4 100644
--- a/sources/ippcp/pcptdesencryptcbcpx.c
+++ b/sources/ippcp/pcptdesencryptcbcpx.c
@@ -14,16 +14,16 @@
 * limitations under the License.
 *************************************************************************/
 
-/* 
-// 
+/*
+//
 //  Purpose:
 //     Cryptography Primitive.
 //     Encrypt byte data stream according to TDES (CBC mode)
-// 
+//
 //  Contents:
-//     EncyptCBC_TDES()
-// 
-// 
+//     EncryptCBC_TDES()
+//
+//
 */
 
 #include "owndefs.h"
diff --git a/sources/ippcp/pcptool.h b/sources/ippcp/pcptool.h
index 3680cc85..e1ba659b 100644
--- a/sources/ippcp/pcptool.h
+++ b/sources/ippcp/pcptool.h
@@ -32,7 +32,7 @@
 #define _NEW_XOR16_
 
 /* copy data block */
-__INLINE void CopyBlock(const void* pSrc, void* pDst, cpSize numBytes)
+__IPPCP_INLINE void CopyBlock(const void* pSrc, void* pDst, cpSize numBytes)
 {
    const Ipp8u* s  = (Ipp8u*)pSrc;
    Ipp8u* d  = (Ipp8u*)pDst;
@@ -41,7 +41,7 @@ __INLINE void CopyBlock(const void* pSrc, void* pDst, cpSize numBytes)
       d[k] = s[k];
 }
 
-__INLINE void CopyBlock8(const void* pSrc, void* pDst)
+__IPPCP_INLINE void CopyBlock8(const void* pSrc, void* pDst)
 {
    int k;
    for(k=0; k<8; k++ )
@@ -52,13 +52,13 @@ __INLINE void CopyBlock8(const void* pSrc, void* pDst)
 #define CopyBlock16 OWNAPI(CopyBlock16)
 IPP_OWN_DECL (void, CopyBlock16, (const void* pSrc, void* pDst))
 
-__INLINE void CopyBlock24(const void* pSrc, void* pDst)
+__IPPCP_INLINE void CopyBlock24(const void* pSrc, void* pDst)
 {
    int k;
    for(k=0; k<24; k++ )
       ((Ipp8u*)pDst)[k] = ((Ipp8u*)pSrc)[k];
 }
-__INLINE void CopyBlock32(const void* pSrc, void* pDst)
+__IPPCP_INLINE void CopyBlock32(const void* pSrc, void* pDst)
 {
    int k;
    for(k=0; k<32; k++ )
@@ -68,7 +68,7 @@ __INLINE void CopyBlock32(const void* pSrc, void* pDst)
 /*
 // padding data block
 */
-__INLINE void PadBlock(Ipp8u paddingByte, void* pDst, cpSize numBytes)
+__IPPCP_INLINE void PadBlock(Ipp8u paddingByte, void* pDst, cpSize numBytes)
 {
    Ipp8u* d  = (Ipp8u*)pDst;
    cpSize k;
@@ -77,7 +77,7 @@ __INLINE void PadBlock(Ipp8u paddingByte, void* pDst, cpSize numBytes)
 }
 
 #if !((_IPP>=_IPP_W7) || (_IPP32E>=_IPP32E_M7))
-__INLINE void PurgeBlock(void* pDst, int len)
+__IPPCP_INLINE void PurgeBlock(void* pDst, int len)
 {
    int n;
    for(n=0; n<len; n++) ((Ipp8u*)pDst)[n] = 0;
@@ -88,7 +88,7 @@ __INLINE void PurgeBlock(void* pDst, int len)
 #endif
 
 /* fill block */
-__INLINE void FillBlock16(Ipp8u filler, const void* pSrc, void* pDst, int len)
+__IPPCP_INLINE void FillBlock16(Ipp8u filler, const void* pSrc, void* pDst, int len)
 {
    int n;
    for(n=0; n<len; n++) ((Ipp8u*)pDst)[n] = ((Ipp8u*)pSrc)[n];
@@ -96,7 +96,7 @@ __INLINE void FillBlock16(Ipp8u filler, const void* pSrc, void* pDst, int len)
 }
 
 /* xor block */
-__INLINE void XorBlock(const void* pSrc1, const void* pSrc2, void* pDst, int len)
+__IPPCP_INLINE void XorBlock(const void* pSrc1, const void* pSrc2, void* pDst, int len)
 {
    const Ipp8u* p1 = (const Ipp8u*)pSrc1;
    const Ipp8u* p2 = (const Ipp8u*)pSrc2;
@@ -113,7 +113,7 @@ __INLINE void XorBlock(const void* pSrc1, const void* pSrc2, void* pDst, int len
  *  |len| specifies how many bytes of |pSrc1| shall be xor-ed to |pSrc2|. It must not
  *  be more than |blockSize|, and this condition should be ensured outside.
  */
-__INLINE void XorBlockMirror(const void* pSrc1, const void* pSrc2, void* pDst, int blockSize, int len)
+__IPPCP_INLINE void XorBlockMirror(const void* pSrc1, const void* pSrc2, void* pDst, int blockSize, int len)
 {
    const Ipp8u* p1 = (const Ipp8u*)pSrc1;
    const Ipp8u* p2 = (const Ipp8u*)pSrc2;
@@ -122,7 +122,7 @@ __INLINE void XorBlockMirror(const void* pSrc1, const void* pSrc2, void* pDst, i
    for(k=0; k<len; k++)
       d[blockSize-k-1] = (Ipp8u)(p1[k] ^ p2[blockSize-k-1]);
 }
-__INLINE void XorBlock8(const void* pSrc1, const void* pSrc2, void* pDst)
+__IPPCP_INLINE void XorBlock8(const void* pSrc1, const void* pSrc2, void* pDst)
 {
    const Ipp8u* p1 = (const Ipp8u*)pSrc1;
    const Ipp8u* p2 = (const Ipp8u*)pSrc2;
@@ -136,7 +136,7 @@ __INLINE void XorBlock8(const void* pSrc1, const void* pSrc2, void* pDst)
 #define XorBlock16 OWNAPI(XorBlock16)
 IPP_OWN_DECL (void, XorBlock16, (const void* pSrc1, const void* pSrc2, void* pDst))
 
-__INLINE void XorBlock24(const void* pSrc1, const void* pSrc2, void* pDst)
+__IPPCP_INLINE void XorBlock24(const void* pSrc1, const void* pSrc2, void* pDst)
 {
    const Ipp8u* p1 = (const Ipp8u*)pSrc1;
    const Ipp8u* p2 = (const Ipp8u*)pSrc2;
@@ -145,7 +145,7 @@ __INLINE void XorBlock24(const void* pSrc1, const void* pSrc2, void* pDst)
    for(k=0; k<24; k++ )
       d[k] = (Ipp8u)(p1[k] ^p2[k]);
 }
-__INLINE void XorBlock32(const void* pSrc1, const void* pSrc2, void* pDst)
+__IPPCP_INLINE void XorBlock32(const void* pSrc1, const void* pSrc2, void* pDst)
 {
    const Ipp8u* p1 = (const Ipp8u*)pSrc1;
    const Ipp8u* p2 = (const Ipp8u*)pSrc2;
@@ -157,7 +157,7 @@ __INLINE void XorBlock32(const void* pSrc1, const void* pSrc2, void* pDst)
 
 
 /* compare (equivalence) */
-__INLINE int EquBlock(const void* pSrc1, const void* pSrc2, int len)
+__IPPCP_INLINE int EquBlock(const void* pSrc1, const void* pSrc2, int len)
 {
    const Ipp8u* p1 = (const Ipp8u*)pSrc1;
    const Ipp8u* p2 = (const Ipp8u*)pSrc2;
@@ -171,7 +171,7 @@ __INLINE int EquBlock(const void* pSrc1, const void* pSrc2, int len)
 
 /* addition (incrementation) functions for CTR mode of diffenent block ciphers */
 /* constant execution time version */
-__INLINE void StdIncrement(Ipp8u* pCounter, int blkBitSize, int numSize)
+__IPPCP_INLINE void StdIncrement(Ipp8u* pCounter, int blkBitSize, int numSize)
 {
    int maskPosition = (blkBitSize -numSize)/8;
    Ipp8u maskVal = (Ipp8u)( 0xFF >> (blkBitSize -numSize)%8 );
@@ -193,7 +193,7 @@ __INLINE void StdIncrement(Ipp8u* pCounter, int blkBitSize, int numSize)
 }
 
 /* vb */
-__INLINE void ompStdIncrement64( void* pInitCtrVal, void* pCurrCtrVal,
+__IPPCP_INLINE void ompStdIncrement64( void* pInitCtrVal, void* pCurrCtrVal,
                                 int ctrNumBitSize, int n )
 {
     int    k;
@@ -247,7 +247,7 @@ __INLINE void ompStdIncrement64( void* pInitCtrVal, void* pCurrCtrVal,
 
 
 /* vb */
-__INLINE void ompStdIncrement128( void* pInitCtrVal, void* pCurrCtrVal,
+__IPPCP_INLINE void ompStdIncrement128( void* pInitCtrVal, void* pCurrCtrVal,
                                  int ctrNumBitSize, int n )
 {
     int    k;
@@ -342,7 +342,7 @@ __INLINE void ompStdIncrement128( void* pInitCtrVal, void* pCurrCtrVal,
 
 #if 0
 /* vb */
-__INLINE void ompStdIncrement192( void* pInitCtrVal, void* pCurrCtrVal,
+__IPPCP_INLINE void ompStdIncrement192( void* pInitCtrVal, void* pCurrCtrVal,
                                 int ctrNumBitSize, int n )
 {
     int    k;
@@ -468,7 +468,7 @@ __INLINE void ompStdIncrement192( void* pInitCtrVal, void* pCurrCtrVal,
 
 #if 0
 /* vb */
-__INLINE void ompStdIncrement256( void* pInitCtrVal, void* pCurrCtrVal,
+__IPPCP_INLINE void ompStdIncrement256( void* pInitCtrVal, void* pCurrCtrVal,
                                  int ctrNumBitSize, int n )
 {
     int    k;
diff --git a/sources/ippcp/pcpver.h b/sources/ippcp/pcpver.h
index 0f3dc9c1..5ed5979d 100644
--- a/sources/ippcp/pcpver.h
+++ b/sources/ippcp/pcpver.h
@@ -26,5 +26,6 @@
 #include "ippver.h"
 #define BUILD() 1043
 #define VERSION() BASE_VERSION(),BUILD()
+#define STR_FILE_VERSION() STR_BASE_VERSION() "," STR(BUILD())
 
 /* ////////////////////////// End of file "pcpver.h" ///////////////////////// */
diff --git a/sources/ippcp/sm2/ifma_arith_nsm2.c b/sources/ippcp/sm2/ifma_arith_nsm2.c
index be0fa375..d6ba6a63 100644
--- a/sources/ippcp/sm2/ifma_arith_nsm2.c
+++ b/sources/ippcp/sm2/ifma_arith_nsm2.c
@@ -183,12 +183,12 @@ IPP_OWN_DEFN(fesm2, fesm2_from_mont_norder, (const fesm2 a)) {
     return r;
 }
 
-__INLINE fesm2 mul_norder_norm(const fesm2 a, const fesm2 b) {
+__IPPCP_INLINE fesm2 mul_norder_norm(const fesm2 a, const fesm2 b) {
     const fesm2 r = fesm2_mul_norder(a, b);
     return ifma_lnorm52(r);
 }
 
-__INLINE fesm2 sqr_norder_norm(const fesm2 a) {
+__IPPCP_INLINE fesm2 sqr_norder_norm(const fesm2 a) {
     const fesm2 r = fesm2_mul_norder(a, a);
     return ifma_lnorm52(r);
 }
diff --git a/sources/ippcp/sm2/ifma_arith_psm2.c b/sources/ippcp/sm2/ifma_arith_psm2.c
index 127da410..84950a63 100644
--- a/sources/ippcp/sm2/ifma_arith_psm2.c
+++ b/sources/ippcp/sm2/ifma_arith_psm2.c
@@ -246,12 +246,12 @@ IPP_OWN_DEFN(fesm2, fesm2_from_mont, (const fesm2 a)) {
     return r;
 }
 
-__INLINE fesm2 fesm2_mul_norm(const fesm2 a, const fesm2 b) {
+__IPPCP_INLINE fesm2 fesm2_mul_norm(const fesm2 a, const fesm2 b) {
     fesm2 r = fesm2_mul(a, b);
     return ifma_lnorm52(r);
 }
 
-__INLINE fesm2 fesm2_sqr_norm(const fesm2 a) {
+__IPPCP_INLINE fesm2 fesm2_sqr_norm(const fesm2 a) {
     fesm2 r = fesm2_sqr(a);
     return ifma_lnorm52(r);
 }
@@ -262,7 +262,7 @@ __INLINE fesm2 fesm2_sqr_norm(const fesm2 a) {
     fesm2_mul_dual(&(R1), (A1), (B1), &(R2), (A2), (B2)); \
     ifma_lnorm52_dual(&(R1), (R1), &(R2), (R2));
 
-__INLINE fesm2 fesm2_sqr_ntimes(const fesm2 a, int n) {
+__IPPCP_INLINE fesm2 fesm2_sqr_ntimes(const fesm2 a, int n) {
     fesm2 r = a;
     for (; n > 0; --n)
         sqr(r, r);
diff --git a/sources/ippcp/sm2/ifma_arith_psm2.h b/sources/ippcp/sm2/ifma_arith_psm2.h
index 18dde980..f84e43a9 100644
--- a/sources/ippcp/sm2/ifma_arith_psm2.h
+++ b/sources/ippcp/sm2/ifma_arith_psm2.h
@@ -62,7 +62,7 @@ IPP_OWN_DECL(fesm2, fesm2_mul, (const fesm2 a, const fesm2 b))
  * \param[in] a value (in radix 2^52)
  * \return fesm2 not normalization value
  */
-__INLINE IPP_OWN_DEFN(fesm2, fesm2_sqr, (const fesm2 a)) {
+__IPPCP_INLINE IPP_OWN_DEFN(fesm2, fesm2_sqr, (const fesm2 a)) {
     return fesm2_mul(a, a);
 }
 
@@ -98,7 +98,7 @@ IPP_OWN_DECL(void, fesm2_mul_dual, (fesm2 pr1[], const fesm2 a1, const fesm2 b1,
  * \param[out] pr2 ptr second value no normalization
  * \param[in]  a2  value (in radix 2^52)
  */
-__INLINE IPP_OWN_DEFN(void, fesm2_sqr_dual, (fesm2 pr1[], const fesm2 a1, fesm2 pr2[], const fesm2 a2)) {
+__IPPCP_INLINE IPP_OWN_DEFN(void, fesm2_sqr_dual, (fesm2 pr1[], const fesm2 a1, fesm2 pr2[], const fesm2 a2)) {
     fesm2_mul_dual(pr1, a1, a1, pr2, a2, a2);
     return;
 }
diff --git a/sources/ippcp/sm2/ifma_defs_sm2.h b/sources/ippcp/sm2/ifma_defs_sm2.h
index 3b7c29eb..a469d28d 100644
--- a/sources/ippcp/sm2/ifma_defs_sm2.h
+++ b/sources/ippcp/sm2/ifma_defs_sm2.h
@@ -50,7 +50,7 @@ static const __ALIGN64 Ipp64u PSM2_R[PSM2_LEN52] = {
  * 0xFF - is equal one
  * 0x00 - is no equal one
  */
-__INLINE mask8 sm2_is_msb(const mask8 a) {
+__IPPCP_INLINE mask8 sm2_is_msb(const mask8 a) {
     return (mask8)((mask8)0 - (a >> 7));
 }
 
@@ -62,7 +62,7 @@ __INLINE mask8 sm2_is_msb(const mask8 a) {
  * 0xFF - is zero value
  * 0x00 - no equal zero
  */
-__INLINE mask8 sm2_is_zero_i64(const m512 a) {
+__IPPCP_INLINE mask8 sm2_is_zero_i64(const m512 a) {
     const mask8 mask = cmp_i64_mask(a, setzero_i64(), _MM_CMPINT_NE);
     return sm2_is_msb((~mask & (mask - 1)));
 }
diff --git a/sources/ippcp/sm2/ifma_ecpoint_sm2.c b/sources/ippcp/sm2/ifma_ecpoint_sm2.c
index c809c767..f4048113 100644
--- a/sources/ippcp/sm2/ifma_ecpoint_sm2.c
+++ b/sources/ippcp/sm2/ifma_ecpoint_sm2.c
@@ -38,7 +38,7 @@ static const __ALIGN64 Ipp64u psm2_x8[PSM2_LEN52] = {
     0x000ffffffffffff8, 0x000f800000007fff, 0x000fffffffffffff, 0x000fffffffffffff, 0x0007fffffff7ffff};
 
 /* Mont(a) = a*r mod psm2, where r = 2^(6*52) mod psm2 */
-static const __ALIGN64 Ipp64u psm2_a[PSM2_LEN52] = { 
+static const __ALIGN64 Ipp64u psm2_a[PSM2_LEN52] = {
     0x000ffffffcffffff, 0x000ff03000000fcf, 0x000cffffffffffff, 0x000fffffffffffff, 0x0000fcfffffeffff};
 
 /* Mont(b) = b*r mod psm2, where r = 2^(6*52) mod psm2 */
@@ -502,7 +502,7 @@ static __NOINLINE void clear_secret_context(Ipp16u* wval,
     return;
 }
 
-__INLINE mask8 is_eq_mask(const Ipp32s a, const Ipp32s b) {
+__IPPCP_INLINE mask8 is_eq_mask(const Ipp32s a, const Ipp32s b) {
     const Ipp32s eq  = a ^ b;
     const Ipp32s v   = ~eq & (eq - 1);
     const Ipp32s msb = 0 - (v >> (sizeof(a) * 8 - 1));
@@ -649,7 +649,7 @@ IPP_OWN_DEFN(void, gesm2_mul, (PSM2_POINT_IFMA * r, const PSM2_POINT_IFMA* p, co
 #define BP_WIN_SIZE BASE_POINT_WIN_SIZE
 #define BP_N_ENTRY  BASE_POINT_N_ENTRY
 
-__INLINE void extract_point_affine(PSM2_AFFINE_POINT_IFMA* r,
+__IPPCP_INLINE void extract_point_affine(PSM2_AFFINE_POINT_IFMA* r,
                                    const SINGLE_PSM2_AFFINE_POINT_IFMA* tbl,
                                    const Ipp32s digit) {
     const Ipp32s idx = digit - 1;
diff --git a/sources/ippcp/sm2/ifma_ecpoint_sm2.h b/sources/ippcp/sm2/ifma_ecpoint_sm2.h
index 2a9dab66..8f325659 100644
--- a/sources/ippcp/sm2/ifma_ecpoint_sm2.h
+++ b/sources/ippcp/sm2/ifma_ecpoint_sm2.h
@@ -134,7 +134,7 @@ IPP_OWN_DECL(void, gesm2_select_ap_w7_ifma, (BNU_CHUNK_T * pAffinePoint, const B
 #include "pcpgfpstuff.h"
 #include "pcpgfpecstuff.h"
 
-__INLINE void recode_point_to_mont52(PSM2_POINT_IFMA* pR,
+__IPPCP_INLINE void recode_point_to_mont52(PSM2_POINT_IFMA* pR,
                                      const BNU_CHUNK_T* pP,
                                      BNU_CHUNK_T* pPool,
                                      ifmaArithMethod* method,
@@ -161,7 +161,7 @@ __INLINE void recode_point_to_mont52(PSM2_POINT_IFMA* pR,
     pR->z = p_to_mont(pR->z);
 }
 
-__INLINE void recode_point_to_mont64(IppsGFpECPoint* pR,
+__IPPCP_INLINE void recode_point_to_mont64(IppsGFpECPoint* pR,
                                      PSM2_POINT_IFMA* pP,
                                      BNU_CHUNK_T* pPool,
                                      ifmaArithMethod* method,
diff --git a/sources/ippcp/sm2/ifma_sm2_key_exchange_shared_key.c b/sources/ippcp/sm2/ifma_sm2_key_exchange_shared_key.c
index 466c57a6..a8f03c28 100644
--- a/sources/ippcp/sm2/ifma_sm2_key_exchange_shared_key.c
+++ b/sources/ippcp/sm2/ifma_sm2_key_exchange_shared_key.c
@@ -26,7 +26,7 @@
 
 
 /* clang-format off */
-__INLINE void ifma_sm2_set_affine_point_radix52(PSM2_POINT_IFMA *rp,
+__IPPCP_INLINE void ifma_sm2_set_affine_point_radix52(PSM2_POINT_IFMA *rp,
                                                 const BNU_CHUNK_T *x, const BNU_CHUNK_T *y,
                                                 ifmaArithMethod *method)
 /* clang-format on */
@@ -45,7 +45,7 @@ __INLINE void ifma_sm2_set_affine_point_radix52(PSM2_POINT_IFMA *rp,
 }
 
 /* clang-format off */
-__INLINE void ifma_sm2_get_affine(BNU_CHUNK_T *x, BNU_CHUNK_T *y,
+__IPPCP_INLINE void ifma_sm2_get_affine(BNU_CHUNK_T *x, BNU_CHUNK_T *y,
                                   const PSM2_POINT_IFMA* p,
                                   ifmaArithMethod* method)
 /* clang-format on */
@@ -115,7 +115,7 @@ IPP_OWN_DEFN(IppStatus, gfec_key_exchange_sm2_shared_key_avx512, (Ipp8u* pShared
    cpEcGFpReleasePool(1, pEC);
    IPP_BADARG_RET(!result, ippStsEphemeralKeyErr);
 
-   /* create buffer data (it needes further use compute tmp_p)
+   /* create buffer data (it needs further use compute tmp_p)
     * -> SM3( x(u/v)(0) || Za(1) || Zb(2) || xa(3) || ya(4) || xb(5) || yb(6) )
     */
    BNU_CHUNK_T *pDataBuff = cpGFpGetPool(7, pME);
diff --git a/sources/ippcp/sm2/sm2_key_exchange_shared_key.c b/sources/ippcp/sm2/sm2_key_exchange_shared_key.c
index 380cb649..a06952e9 100644
--- a/sources/ippcp/sm2/sm2_key_exchange_shared_key.c
+++ b/sources/ippcp/sm2/sm2_key_exchange_shared_key.c
@@ -57,7 +57,7 @@
  * ippStsRangeErr            - if BitSize(pEC) < IPP_SM3_DIGEST_BITSIZE
  * ippStsBadArgErr           - if role(pKE) no equal ippKESM2Requester|ippKESM2Responder or sharedKeySize <= 0
  * ippStsInvalidPrivateKey   - if test is failed 0 < pPrvKey|pEphPrvKey < Order
- * ippStsEphemeralKeyErr     - if test is failed pEphPrvKey == pEphPublicKeySelf*G or if calculated U(V) is an 
+ * ippStsEphemeralKeyErr     - if test is failed pEphPrvKey == pEphPublicKeySelf*G or if calculated U(V) is an
  *                             infinity point, U/V = [h*t(a/b)]( P(b/a) + [x(b/a)`]R(b/a) ) = ( x(u/v), y(u/v) )
  */
 /* clang-format off */
@@ -148,7 +148,7 @@ IPPFUN(IppStatus, ippsGFpECKeyExchangeSM2_SharedKey, (Ipp8u* pSharedKey, int sha
       const int elemBytes = (elemBits + 7) / 8; /* size Bytes */
       const int elemSize  = GFP_FELEN(pME);     /* size BNU_CHUNK */
 
-      /* create buffer data (it needes further use compute tmp_p)
+      /* create buffer data (it needs further use compute tmp_p)
        * -> SM3( x(u/v)(0) || Za(1) || Zb(2) || xa(3) || ya(4) || xb(5) || yb(6) )
        */
       BNU_CHUNK_T *pDataBuff = cpGFpGetPool(7, pME);
diff --git a/sources/ippcp/sm2/sm2_stuff.c b/sources/ippcp/sm2/sm2_stuff.c
index 8eb5fc04..0da31f65 100644
--- a/sources/ippcp/sm2/sm2_stuff.c
+++ b/sources/ippcp/sm2/sm2_stuff.c
@@ -51,7 +51,7 @@ IPP_OWN_DEFN(IppStatus, computeZa_user_id_hash_sm2, (Ipp8u * pZa_digest,
    IPP_BAD_PTR2_RET(pZa_digest, p_user_id);
    /* check border (user_id_len > 0) | (elem_len > 0) */
    IPP_BADARG_RET(!(user_id_len > 0) || !(elem_len > 0), ippStsBadArgErr);
-   /* check (user_id_len*8 <= 0xFFFF) ~ (user_id_len <= 0x1FFF) for two bytes overflow. 
+   /* check (user_id_len*8 <= 0xFFFF) ~ (user_id_len <= 0x1FFF) for two bytes overflow.
       user_id_len*8 operation will be executed in algorithm's flow */
    IPP_BADARG_RET(user_id_len > 0x1FFF, ippStsBadArgErr);
    /* param curve: a, b, Gx, Gy */
@@ -97,7 +97,7 @@ IPP_OWN_DEFN(IppStatus, computeZa_user_id_hash_sm2, (Ipp8u * pZa_digest,
 
 #define SIZE_CT (4)
 
-__INLINE void convert_ct_to_big_endian(Ipp8u pCt[SIZE_CT], const Ipp32u ct)
+__IPPCP_INLINE void convert_ct_to_big_endian(Ipp8u pCt[SIZE_CT], const Ipp32u ct)
 {
    pCt[0] = (Ipp8u)(ct >> 24);
    pCt[1] = (Ipp8u)(ct >> 16);
diff --git a/sources/ippcp/sm2/sm2_stuff.h b/sources/ippcp/sm2/sm2_stuff.h
index f0ab23ad..ccef5ed6 100644
--- a/sources/ippcp/sm2/sm2_stuff.h
+++ b/sources/ippcp/sm2/sm2_stuff.h
@@ -46,7 +46,7 @@
  * @param[in out] arr array data
  * @param[in]     len length array
  */
-__INLINE void cpSM2KE_reverse_inplace(Ipp8u *arr, const int len)
+__IPPCP_INLINE void cpSM2KE_reverse_inplace(Ipp8u *arr, const int len)
 {
 #define SWAPXOR(x, y) \
    (x) ^= (y);        \
@@ -68,7 +68,7 @@ __INLINE void cpSM2KE_reverse_inplace(Ipp8u *arr, const int len)
  * @param[in]  p    point copy
  * @param[in]  pEC  context Elliptic Curve
  */
-__INLINE void cpSM2KE_CopyPointData(IppsGFpECPoint *r, BNU_CHUNK_T *data, const IppsGFpECPoint *p, const IppsGFpECState *pEC)
+__IPPCP_INLINE void cpSM2KE_CopyPointData(IppsGFpECPoint *r, BNU_CHUNK_T *data, const IppsGFpECPoint *p, const IppsGFpECState *pEC)
 {
    ECP_POINT_SET_ID(r);
    cpEcGFpInitPoint(r, data, ECP_POINT_FLAGS(p), pEC);
@@ -86,7 +86,7 @@ __INLINE void cpSM2KE_CopyPointData(IppsGFpECPoint *r, BNU_CHUNK_T *data, const
  * @param[in]  a   value x
  * @param[in]  pEC context Elliptic Curve
  */
-__INLINE void cpSM2KE_reduction_x2w(BNU_CHUNK_T *r, const BNU_CHUNK_T *a, const IppsGFpECState *pEC)
+__IPPCP_INLINE void cpSM2KE_reduction_x2w(BNU_CHUNK_T *r, const BNU_CHUNK_T *a, const IppsGFpECState *pEC)
 {
    const gsModEngine *pME = GFP_PMA(ECP_GFP(pEC));
 
@@ -107,7 +107,7 @@ __INLINE void cpSM2KE_reduction_x2w(BNU_CHUNK_T *r, const BNU_CHUNK_T *a, const
 }
 
 /* clang-format off */
-__INLINE void cpSM2KE_get_affine_ext_euclid(BNU_CHUNK_T *x, BNU_CHUNK_T *y,
+__IPPCP_INLINE void cpSM2KE_get_affine_ext_euclid(BNU_CHUNK_T *x, BNU_CHUNK_T *y,
                                             const IppsGFpECPoint *p,
                                             IppsGFpECState *pEC)
 /* clang-format on */
@@ -120,7 +120,7 @@ __INLINE void cpSM2KE_get_affine_ext_euclid(BNU_CHUNK_T *x, BNU_CHUNK_T *y,
    return;
 }
 
-__INLINE void cpSM2KE_xy_to_BE(BNU_CHUNK_T *x, BNU_CHUNK_T *y, const IppsGFpECState *pEC)
+__IPPCP_INLINE void cpSM2KE_xy_to_BE(BNU_CHUNK_T *x, BNU_CHUNK_T *y, const IppsGFpECState *pEC)
 {
    const gsModEngine *pME = GFP_PMA(ECP_GFP(pEC));
 
@@ -139,7 +139,7 @@ __INLINE void cpSM2KE_xy_to_BE(BNU_CHUNK_T *x, BNU_CHUNK_T *y, const IppsGFpECSt
  * @param[in]  a        hashing an array data
  * @param[in]  numBytes number of bytes
  */
-__INLINE void cpSM2KE_compute_hash_SM3(Ipp8u *r, const Ipp8u *a, const int numBytes)
+__IPPCP_INLINE void cpSM2KE_compute_hash_SM3(Ipp8u *r, const Ipp8u *a, const int numBytes)
 {
    static IppsHashState_rmf ctx;
 
diff --git a/tools/ipp_custom_library_tool_python/gui/settings_panel.py b/tools/ipp_custom_library_tool_python/gui/settings_panel.py
index 5ec6f2fa..2146e797 100644
--- a/tools/ipp_custom_library_tool_python/gui/settings_panel.py
+++ b/tools/ipp_custom_library_tool_python/gui/settings_panel.py
@@ -189,4 +189,4 @@ def disable_widgets(self):
 
     def get_formatted_button_name(self, button):
         button_name = button.text().replace('(R)', '')
-        return re.sub('[^\w-]', '', button_name.lower())
+        return re.sub(r'[^\w-]', '', button_name.lower())
diff --git a/tools/ipp_custom_library_tool_python/tool/utils.py b/tools/ipp_custom_library_tool_python/tool/utils.py
index 18ad9458..4067b79b 100644
--- a/tools/ipp_custom_library_tool_python/tool/utils.py
+++ b/tools/ipp_custom_library_tool_python/tool/utils.py
@@ -27,16 +27,16 @@
 OPENMP = 'openmp'
 TL_TYPES = [TBB, OPENMP]
 
-PATH_TO_PACKAGE_REGEX        = '(?P<path>.*)\Wtools\W.*'
-COMPONENTS_INSTALL_DIR_REGEX = '(?P<path>.*)\Wipp.*'
-VERSION_REGEX                = '.*VERSION_STR\s*(?P<ver>.*)\s*'
-STR_MACROS_REGEX             = '.*STR\((?P<macros>\S*)\).*'
-C_STRING_REGEX               = '.*(\S|^)(?P<string>\s*".*"\s*)(\S|$).*'
-C_STRING_VALUE_REGEX         = '.*"(?P<value>.*)".*'
-FUNCTION_NAME_REGEX          = 'IPPAPI\s*\(\s*(?P<ret_type>.*?)\s*,' \
-                               '\s*(?P<function_name>\S*)\s*,' \
-                               '\s*\(?(?P<args>.*?)\s*\)?\s*\)?\s*$'
-ARGUMENT_REGEX               = '.*\W*\w+\W*\s+\W*(?P<arg>[^\W\d]+\w*)\W*?'
+PATH_TO_PACKAGE_REGEX        = r'(?P<path>.*)\Wtools\W.*'
+COMPONENTS_INSTALL_DIR_REGEX = r'(?P<path>.*)\Wipp.*'
+VERSION_REGEX                = r'.*VERSION_STR\s*(?P<ver>.*)\s*'
+STR_MACROS_REGEX             = r'.*STR\((?P<macros>\S*)\).*'
+C_STRING_REGEX               = r'.*(\S|^)(?P<string>\s*".*"\s*)(\S|$).*'
+C_STRING_VALUE_REGEX         = r'.*"(?P<value>.*)".*'
+FUNCTION_NAME_REGEX          = r'IPPAPI\s*\(\s*(?P<ret_type>.*?)\s*,' \
+                               r'\s*(?P<function_name>\S*)\s*,' \
+                               r'\s*\(?(?P<args>.*?)\s*\)?\s*\)?\s*$'
+ARGUMENT_REGEX               = r'.*\W*\w+\W*\s+\W*(?P<arg>[^\W\d]+\w*)\W*?'
 
 CUSTOM_LIBRARY_NAME = 'Custom library name'
 BUILD_SCRIPT_NAME   = 'Build script name'