Feat: SIMD of for loop

This commit is contained in:
finn 2022-07-18 21:22:03 +02:00
parent 8c997087e9
commit 88ee99b37c

View file

@ -1,12 +1,17 @@
#include "../../lib/md2_impls/md2_1.h" #include "../../lib/md2_impls/md2_1.h"
#include "../../lib/md2_impls/md2_common.h" #include "../../lib/md2_impls/md2_common.h"
#include <immintrin.h>
void md2_checksum_1(size_t len, uint8_t* buf) {
void md2_checksum_1(size_t len, uint8_t *buf)
{
uint8_t l = 0; uint8_t l = 0;
for (size_t i = 0; i < len / 16; i++) { for (size_t i = 0; i < len / 16; i++)
for (int j = 0; j < 16; j++) { {
for (int j = 0; j < 16; j++)
{
u_int8_t c = buf[i * 16 + j]; u_int8_t c = buf[i * 16 + j];
// reference is wrong. It says: Set C[j] to S[c xor L]. But it should be: // reference is wrong. It says: Set C[j] to S[c xor L]. But it should be:
buf[len + j] ^= MD2_PI_SUBST[c ^ l]; buf[len + j] ^= MD2_PI_SUBST[c ^ l];
@ -15,27 +20,50 @@ void md2_checksum_1(size_t len, uint8_t* buf) {
} }
} }
void md2_hash_1(size_t len, const uint8_t buf[len], uint8_t out[16]) { static uint8_t PADDING[17][16] = {
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{6, 6, 6, 6, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0},
{9, 9, 9, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0},
{10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, 0},
{11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0, 0, 0, 0, 0},
{12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 0, 0, 0, 0},
{13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 0, 0, 0},
{14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0},
{15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0},
{16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16}};
void md2_hash_1(size_t len, const uint8_t buf[len], uint8_t out[16])
{
// === step 1 === // === step 1 ===
int paddingNeeded = 16 - (len & 7); int paddingNeeded = 16 - (len & 15);
uint8_t originalPadding = paddingNeeded;
len += paddingNeeded; len += paddingNeeded;
// printf("len: %d\n", len); // printf("len: %d\n", len);
// +16 for the checksum // +16 for the checksum
uint8_t* newBuf = calloc(len + 16, sizeof(uint8_t)); uint8_t* newBuf = aligned_alloc(16, sizeof(uint8_t)*(len + 16));
for(size_t i = 0; i < 16; i++) {
newBuf[len + i] = 0;
}
// uint8_t* newBuf = calloc(len + 16, sizeof(uint8_t));
// TODO: null check // TODO: null check
memcpy(newBuf, buf, len - paddingNeeded); memcpy(newBuf, buf, len - paddingNeeded);
// printBuf(len + 16, newBuf); //md2_print_buf(len + 16, newBuf);
memcpy(newBuf + len - paddingNeeded, PADDING + paddingNeeded, paddingNeeded);
while (paddingNeeded > 0) {
newBuf[len - paddingNeeded] = originalPadding;
paddingNeeded--;
}
// printf("buf with padding: "); // printf("buf with padding: ");
// printBuf(len + 16, newBuf); //md2_print_buf(len + 16, newBuf);
// === step 2 === // === step 2 ===
md2_checksum_1(len, newBuf); md2_checksum_1(len, newBuf);
@ -44,25 +72,57 @@ void md2_hash_1(size_t len, const uint8_t buf[len], uint8_t out[16]) {
// printBuf(len + 16, newBuf); // printBuf(len + 16, newBuf);
// === step 3 === // === step 3 ===
uint8_t* messageDigestBuf = calloc(48, sizeof(uint8_t)); uint8_t *messageDigestBuf = aligned_alloc(16, sizeof(uint8_t) * 48);
// TODO: add null check // TODO: add null check
for (size_t i = 0; i < 48; i++) {
messageDigestBuf[i] = 0;
}
// === step 4 === // === step 4 ===
// <= because we need to hash the last block too // <= because we need to hash the last block too
for (size_t i = 0; i <= (len + 16) / 16 - 1; i++) { __m128i vx;
for (int j = 0; j < 16; j++) { __m128i vy;
messageDigestBuf[16 + j] = newBuf[i * 16 + j]; for (size_t i = 0; i <= (len + 16) / 16 - 1; i++)
{
vx = _mm_load_si128((__m128i*) (newBuf + i * 16));
_mm_store_si128((__m128i*) (messageDigestBuf + 16), vx);
vy = _mm_load_si128((__m128i*) (messageDigestBuf));
vy = _mm_xor_si128(vy, vx);
_mm_store_si128((__m128i*) (messageDigestBuf + 32), vy);
/*
for (int j = 0; j < 16; j++)
{
//messageDigestBuf[16 + j] = newBuf[i * 16 + j];
messageDigestBuf[32 + j] = messageDigestBuf[32 + j] =
(messageDigestBuf[16 + j] ^ messageDigestBuf[j]); (messageDigestBuf[16 + j] ^ messageDigestBuf[j]);
} }
/*
vy = _mm_load_si128((__m128i*) (messageDigestBuf));
_mm_xor_si128(vy, vx);
_mm_store_si128((__m128i*) messageDigestBuf, vy);
md2_print_buf(48, messageDigestBuf);
md2_print_buf(len + 16, newBuf);
/*
printf("newBuf: erstes Element:");
printf(*(newBuf + i * 16));
printf("mdb:");
printf(*(messageDigestBuf + 16));
*/
u_int8_t t = 0; u_int8_t t = 0;
for (int j = 0; j < 18; j++) { for (int j = 0; j < 18; j++)
for (int k = 0; k < 48; k++) { {
for (int k = 0; k < 48; k++)
{
t = messageDigestBuf[k] = messageDigestBuf[k] ^ MD2_PI_SUBST[t]; t = messageDigestBuf[k] = messageDigestBuf[k] ^ MD2_PI_SUBST[t];
} }
t = (t + j) & 127; t = (t + j) & 255;
} }
} }
// printf("messageDigestBuf: \n"); // printf("messageDigestBuf: \n");