Feat: SIMD of for loop
This commit is contained in:
parent
8c997087e9
commit
88ee99b37c
1 changed files with 80 additions and 20 deletions
|
@ -1,12 +1,17 @@
|
||||||
#include "../../lib/md2_impls/md2_1.h"
|
#include "../../lib/md2_impls/md2_1.h"
|
||||||
|
|
||||||
#include "../../lib/md2_impls/md2_common.h"
|
#include "../../lib/md2_impls/md2_common.h"
|
||||||
|
#include <immintrin.h>
|
||||||
|
|
||||||
void md2_checksum_1(size_t len, uint8_t* buf) {
|
|
||||||
|
void md2_checksum_1(size_t len, uint8_t *buf)
|
||||||
|
{
|
||||||
uint8_t l = 0;
|
uint8_t l = 0;
|
||||||
|
|
||||||
for (size_t i = 0; i < len / 16; i++) {
|
for (size_t i = 0; i < len / 16; i++)
|
||||||
for (int j = 0; j < 16; j++) {
|
{
|
||||||
|
for (int j = 0; j < 16; j++)
|
||||||
|
{
|
||||||
u_int8_t c = buf[i * 16 + j];
|
u_int8_t c = buf[i * 16 + j];
|
||||||
// reference is wrong. It says: Set C[j] to S[c xor L]. But it should be:
|
// reference is wrong. It says: Set C[j] to S[c xor L]. But it should be:
|
||||||
buf[len + j] ^= MD2_PI_SUBST[c ^ l];
|
buf[len + j] ^= MD2_PI_SUBST[c ^ l];
|
||||||
|
@ -15,27 +20,50 @@ void md2_checksum_1(size_t len, uint8_t* buf) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void md2_hash_1(size_t len, const uint8_t buf[len], uint8_t out[16]) {
|
static uint8_t PADDING[17][16] = {
|
||||||
|
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
{2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
{3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
{4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
{5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
{6, 6, 6, 6, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
{7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
{8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
{9, 9, 9, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0},
|
||||||
|
{10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, 0},
|
||||||
|
{11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0, 0, 0, 0, 0},
|
||||||
|
{12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 0, 0, 0, 0},
|
||||||
|
{13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 0, 0, 0},
|
||||||
|
{14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0},
|
||||||
|
{15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0},
|
||||||
|
{16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16}};
|
||||||
|
|
||||||
|
void md2_hash_1(size_t len, const uint8_t buf[len], uint8_t out[16])
|
||||||
|
{
|
||||||
// === step 1 ===
|
// === step 1 ===
|
||||||
int paddingNeeded = 16 - (len & 7);
|
int paddingNeeded = 16 - (len & 15);
|
||||||
uint8_t originalPadding = paddingNeeded;
|
|
||||||
len += paddingNeeded;
|
len += paddingNeeded;
|
||||||
|
|
||||||
// printf("len: %d\n", len);
|
// printf("len: %d\n", len);
|
||||||
|
|
||||||
// +16 for the checksum
|
// +16 for the checksum
|
||||||
uint8_t* newBuf = calloc(len + 16, sizeof(uint8_t));
|
uint8_t* newBuf = aligned_alloc(16, sizeof(uint8_t)*(len + 16));
|
||||||
|
|
||||||
|
for(size_t i = 0; i < 16; i++) {
|
||||||
|
newBuf[len + i] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// uint8_t* newBuf = calloc(len + 16, sizeof(uint8_t));
|
||||||
// TODO: null check
|
// TODO: null check
|
||||||
memcpy(newBuf, buf, len - paddingNeeded);
|
memcpy(newBuf, buf, len - paddingNeeded);
|
||||||
|
|
||||||
// printBuf(len + 16, newBuf);
|
//md2_print_buf(len + 16, newBuf);
|
||||||
|
|
||||||
|
memcpy(newBuf + len - paddingNeeded, PADDING + paddingNeeded, paddingNeeded);
|
||||||
|
|
||||||
while (paddingNeeded > 0) {
|
|
||||||
newBuf[len - paddingNeeded] = originalPadding;
|
|
||||||
paddingNeeded--;
|
|
||||||
}
|
|
||||||
// printf("buf with padding: ");
|
// printf("buf with padding: ");
|
||||||
// printBuf(len + 16, newBuf);
|
//md2_print_buf(len + 16, newBuf);
|
||||||
|
|
||||||
// === step 2 ===
|
// === step 2 ===
|
||||||
md2_checksum_1(len, newBuf);
|
md2_checksum_1(len, newBuf);
|
||||||
|
@ -44,25 +72,57 @@ void md2_hash_1(size_t len, const uint8_t buf[len], uint8_t out[16]) {
|
||||||
// printBuf(len + 16, newBuf);
|
// printBuf(len + 16, newBuf);
|
||||||
|
|
||||||
// === step 3 ===
|
// === step 3 ===
|
||||||
uint8_t* messageDigestBuf = calloc(48, sizeof(uint8_t));
|
uint8_t *messageDigestBuf = aligned_alloc(16, sizeof(uint8_t) * 48);
|
||||||
// TODO: add null check
|
// TODO: add null check
|
||||||
|
for (size_t i = 0; i < 48; i++) {
|
||||||
|
messageDigestBuf[i] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
// === step 4 ===
|
// === step 4 ===
|
||||||
// <= because we need to hash the last block too
|
// <= because we need to hash the last block too
|
||||||
for (size_t i = 0; i <= (len + 16) / 16 - 1; i++) {
|
__m128i vx;
|
||||||
for (int j = 0; j < 16; j++) {
|
__m128i vy;
|
||||||
messageDigestBuf[16 + j] = newBuf[i * 16 + j];
|
for (size_t i = 0; i <= (len + 16) / 16 - 1; i++)
|
||||||
|
{
|
||||||
|
|
||||||
|
vx = _mm_load_si128((__m128i*) (newBuf + i * 16));
|
||||||
|
_mm_store_si128((__m128i*) (messageDigestBuf + 16), vx);
|
||||||
|
vy = _mm_load_si128((__m128i*) (messageDigestBuf));
|
||||||
|
vy = _mm_xor_si128(vy, vx);
|
||||||
|
_mm_store_si128((__m128i*) (messageDigestBuf + 32), vy);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
for (int j = 0; j < 16; j++)
|
||||||
|
{
|
||||||
|
//messageDigestBuf[16 + j] = newBuf[i * 16 + j];
|
||||||
messageDigestBuf[32 + j] =
|
messageDigestBuf[32 + j] =
|
||||||
(messageDigestBuf[16 + j] ^ messageDigestBuf[j]);
|
(messageDigestBuf[16 + j] ^ messageDigestBuf[j]);
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
vy = _mm_load_si128((__m128i*) (messageDigestBuf));
|
||||||
|
_mm_xor_si128(vy, vx);
|
||||||
|
_mm_store_si128((__m128i*) messageDigestBuf, vy);
|
||||||
|
md2_print_buf(48, messageDigestBuf);
|
||||||
|
md2_print_buf(len + 16, newBuf);
|
||||||
|
/*
|
||||||
|
printf("newBuf: erstes Element:");
|
||||||
|
printf(*(newBuf + i * 16));
|
||||||
|
printf("mdb:");
|
||||||
|
printf(*(messageDigestBuf + 16));
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
u_int8_t t = 0;
|
u_int8_t t = 0;
|
||||||
|
|
||||||
for (int j = 0; j < 18; j++) {
|
for (int j = 0; j < 18; j++)
|
||||||
for (int k = 0; k < 48; k++) {
|
{
|
||||||
|
for (int k = 0; k < 48; k++)
|
||||||
|
{
|
||||||
t = messageDigestBuf[k] = messageDigestBuf[k] ^ MD2_PI_SUBST[t];
|
t = messageDigestBuf[k] = messageDigestBuf[k] ^ MD2_PI_SUBST[t];
|
||||||
}
|
}
|
||||||
t = (t + j) & 127;
|
t = (t + j) & 255;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// printf("messageDigestBuf: \n");
|
// printf("messageDigestBuf: \n");
|
||||||
|
|
Loading…
Reference in a new issue