Created
February 5, 2026 13:14
-
-
Save t-mat/755a7f9f8a03d3debee80e0068f254ba to your computer and use it in GitHub Desktop.
[C++]AES implementation mimicking x86 AES-NI instructions in software
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // AES implementation mimicking x86 AES-NI instructions in software | |
| // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=aes | |
| #ifndef AESNI_GENERIC_HPP_INCLUDED | |
| # define AESNI_GENERIC_HPP_INCLUDED 1 | |
| # include <stdint.h> | |
| namespace AesNiGeneric | |
| { | |
| namespace details | |
| { | |
| // ======================================================================== | |
| // Internal Helpers | |
| // ======================================================================== | |
| // GF(2^8) Multiplication by 2 (xtime) | |
| inline uint8_t mul2(uint8_t p) { return (p << 1) ^ ((p & 0x80) ? 0x1b : 0); } | |
| inline uint8_t mul4(uint8_t p) { return mul2(mul2(p)); } | |
| inline uint8_t mul8(uint8_t p) { return mul2(mul4(p)); } | |
| // GF(2^8) Combined Multiplications for InvMixColumns | |
| inline uint8_t mul9(uint8_t p) { return mul8(p) ^ p; } | |
| inline uint8_t mul11(uint8_t p) { return mul8(p) ^ mul2(p) ^ p; } | |
| inline uint8_t mul13(uint8_t p) { return mul8(p) ^ mul4(p) ^ p; } | |
| inline uint8_t mul14(uint8_t p) { return mul8(p) ^ mul4(p) ^ mul2(p); } | |
| inline void xor_roundkey(uint8_t *d16, const uint8_t *s16, const uint8_t *rk16) | |
| { | |
| for (int i = 0; i < 16; ++i) { | |
| d16[i] = s16[i] ^ rk16[i]; | |
| } | |
| } | |
| inline void sbox_cnv(uint8_t v_out[4][4], const uint8_t *s16) | |
| { | |
| // AES Forward S-box | |
| static constexpr uint8_t sbox[256] = { | |
| 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, // | |
| 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, // | |
| 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, // | |
| 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, // | |
| 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, // | |
| 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, // | |
| 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, // | |
| 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, // | |
| 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, // | |
| 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, // | |
| 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, // | |
| 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, // | |
| 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, // | |
| 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, // | |
| 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, // | |
| 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16}; | |
| for (int i = 0; i < 16; ++i) { | |
| // ShiftRows: Row r shifts left by r | |
| const int a = ((i / 4) + 4 - (i % 4)) % 4; | |
| const int b = i % 4; | |
| v_out[a][b] = sbox[s16[i]]; | |
| } | |
| } | |
| inline void inv_sbox_cnv(uint8_t v_out[4][4], const uint8_t *s16) | |
| { | |
| // AES Inverse S-box | |
| static constexpr uint8_t inv_sbox[256] = { | |
| 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, // | |
| 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, // | |
| 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, // | |
| 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, // | |
| 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, // | |
| 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, // | |
| 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, // | |
| 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, // | |
| 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, // | |
| 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, // | |
| 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, // | |
| 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, // | |
| 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, // | |
| 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, // | |
| 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, // | |
| 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d}; | |
| for (int i = 0; i < 16; ++i) { | |
| // InvShiftRows: Row r shifts right by r | |
| const int a = ((i / 4) + (i % 4)) % 4; | |
| const int b = i % 4; | |
| v_out[a][b] = inv_sbox[s16[i]]; | |
| } | |
| } | |
| } // namespace details | |
| // ======================================================================== | |
| // Public API | |
| // ======================================================================== | |
| using namespace details; | |
| inline void aesenc(uint8_t *d16, const uint8_t *s16, const uint8_t *rk16) | |
| { | |
| uint8_t v[4][4]; | |
| sbox_cnv(v, s16); // ShiftRows + SubBytes | |
| // MixColumns | |
| for (int i = 0; i < 4; ++i) { | |
| const uint8_t t = v[i][0]; | |
| const uint8_t u = v[i][0] ^ v[i][1] ^ v[i][2] ^ v[i][3]; | |
| v[i][0] ^= u ^ mul2(v[i][0] ^ v[i][1]); | |
| v[i][1] ^= u ^ mul2(v[i][1] ^ v[i][2]); | |
| v[i][2] ^= u ^ mul2(v[i][2] ^ v[i][3]); | |
| v[i][3] ^= u ^ mul2(v[i][3] ^ t); | |
| } | |
| xor_roundkey(d16, &v[0][0], rk16); | |
| } | |
| inline void aesenclast(uint8_t *d16, const uint8_t *s16, const uint8_t *rk16) | |
| { | |
| uint8_t v[4][4]; | |
| sbox_cnv(v, s16); // ShiftRows + SubBytes | |
| xor_roundkey(d16, &v[0][0], rk16); | |
| } | |
| // AES Inverse Mix Columns (equivalent to _mm_aesimc_si128) | |
| // Used for preparing Decryption Round Keys | |
| inline void aesimc(uint8_t *d16, const uint8_t *s16) | |
| { | |
| uint8_t v[4][4]; | |
| for (int i = 0; i < 16; ++i) { | |
| v[i / 4][i % 4] = s16[i]; | |
| } | |
| uint8_t res[4][4]; | |
| for (int i = 0; i < 4; ++i) { | |
| const uint8_t c0 = v[i][0]; | |
| const uint8_t c1 = v[i][1]; | |
| const uint8_t c2 = v[i][2]; | |
| const uint8_t c3 = v[i][3]; | |
| res[i][0] = mul14(c0) ^ mul11(c1) ^ mul13(c2) ^ mul9(c3); | |
| res[i][1] = mul9(c0) ^ mul14(c1) ^ mul11(c2) ^ mul13(c3); | |
| res[i][2] = mul13(c0) ^ mul9(c1) ^ mul14(c2) ^ mul11(c3); | |
| res[i][3] = mul11(c0) ^ mul13(c1) ^ mul9(c2) ^ mul14(c3); | |
| } | |
| for (int i = 0; i < 16; ++i) { | |
| d16[i] = res[i / 4][i % 4]; | |
| } | |
| } | |
| inline void aesdec(uint8_t *d16, const uint8_t *s16, const uint8_t *rk16) | |
| { | |
| uint8_t v[4][4]; | |
| inv_sbox_cnv(v, s16); // InvShiftRows + InvSubBytes | |
| // InvMixColumns | |
| for (int i = 0; i < 4; ++i) { | |
| const uint8_t c0 = v[i][0]; | |
| const uint8_t c1 = v[i][1]; | |
| const uint8_t c2 = v[i][2]; | |
| const uint8_t c3 = v[i][3]; | |
| v[i][0] = mul14(c0) ^ mul11(c1) ^ mul13(c2) ^ mul9(c3); | |
| v[i][1] = mul9(c0) ^ mul14(c1) ^ mul11(c2) ^ mul13(c3); | |
| v[i][2] = mul13(c0) ^ mul9(c1) ^ mul14(c2) ^ mul11(c3); | |
| v[i][3] = mul11(c0) ^ mul13(c1) ^ mul9(c2) ^ mul14(c3); | |
| } | |
| xor_roundkey(d16, &v[0][0], rk16); | |
| } | |
| inline void aesdeclast(uint8_t *d16, const uint8_t *s16, const uint8_t *rk16) | |
| { | |
| uint8_t v[4][4]; | |
| inv_sbox_cnv(v, s16); // InvShiftRows + InvSubBytes | |
| xor_roundkey(d16, &v[0][0], rk16); | |
| } | |
| } // namespace AesNiGeneric | |
| #endif // AESNI_GENERIC_HPP_INCLUDED |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment