Feat: SIMD of for loop
This commit is contained in:
parent
8c997087e9
commit
88ee99b37c
1 changed files with 80 additions and 20 deletions
|
@ -1,12 +1,17 @@
|
|||
#include "../../lib/md2_impls/md2_1.h"
|
||||
|
||||
#include "../../lib/md2_impls/md2_common.h"
|
||||
#include <immintrin.h>
|
||||
|
||||
void md2_checksum_1(size_t len, uint8_t* buf) {
|
||||
|
||||
void md2_checksum_1(size_t len, uint8_t *buf)
|
||||
{
|
||||
uint8_t l = 0;
|
||||
|
||||
for (size_t i = 0; i < len / 16; i++) {
|
||||
for (int j = 0; j < 16; j++) {
|
||||
for (size_t i = 0; i < len / 16; i++)
|
||||
{
|
||||
for (int j = 0; j < 16; j++)
|
||||
{
|
||||
u_int8_t c = buf[i * 16 + j];
|
||||
// reference is wrong. It says: Set C[j] to S[c xor L]. But it should be:
|
||||
buf[len + j] ^= MD2_PI_SUBST[c ^ l];
|
||||
|
@ -15,27 +20,50 @@ void md2_checksum_1(size_t len, uint8_t* buf) {
|
|||
}
|
||||
}
|
||||
|
||||
void md2_hash_1(size_t len, const uint8_t buf[len], uint8_t out[16]) {
|
||||
static uint8_t PADDING[17][16] = {
|
||||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{6, 6, 6, 6, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{9, 9, 9, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0},
|
||||
{10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, 0},
|
||||
{11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0, 0, 0, 0, 0},
|
||||
{12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 0, 0, 0, 0},
|
||||
{13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 0, 0, 0},
|
||||
{14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0},
|
||||
{15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0},
|
||||
{16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16}};
|
||||
|
||||
void md2_hash_1(size_t len, const uint8_t buf[len], uint8_t out[16])
|
||||
{
|
||||
// === step 1 ===
|
||||
int paddingNeeded = 16 - (len & 7);
|
||||
uint8_t originalPadding = paddingNeeded;
|
||||
int paddingNeeded = 16 - (len & 15);
|
||||
len += paddingNeeded;
|
||||
|
||||
// printf("len: %d\n", len);
|
||||
|
||||
// +16 for the checksum
|
||||
uint8_t* newBuf = calloc(len + 16, sizeof(uint8_t));
|
||||
uint8_t* newBuf = aligned_alloc(16, sizeof(uint8_t)*(len + 16));
|
||||
|
||||
for(size_t i = 0; i < 16; i++) {
|
||||
newBuf[len + i] = 0;
|
||||
}
|
||||
|
||||
// uint8_t* newBuf = calloc(len + 16, sizeof(uint8_t));
|
||||
// TODO: null check
|
||||
memcpy(newBuf, buf, len - paddingNeeded);
|
||||
|
||||
// printBuf(len + 16, newBuf);
|
||||
//md2_print_buf(len + 16, newBuf);
|
||||
|
||||
memcpy(newBuf + len - paddingNeeded, PADDING + paddingNeeded, paddingNeeded);
|
||||
|
||||
while (paddingNeeded > 0) {
|
||||
newBuf[len - paddingNeeded] = originalPadding;
|
||||
paddingNeeded--;
|
||||
}
|
||||
// printf("buf with padding: ");
|
||||
// printBuf(len + 16, newBuf);
|
||||
//md2_print_buf(len + 16, newBuf);
|
||||
|
||||
// === step 2 ===
|
||||
md2_checksum_1(len, newBuf);
|
||||
|
@ -44,25 +72,57 @@ void md2_hash_1(size_t len, const uint8_t buf[len], uint8_t out[16]) {
|
|||
// printBuf(len + 16, newBuf);
|
||||
|
||||
// === step 3 ===
|
||||
uint8_t* messageDigestBuf = calloc(48, sizeof(uint8_t));
|
||||
uint8_t *messageDigestBuf = aligned_alloc(16, sizeof(uint8_t) * 48);
|
||||
// TODO: add null check
|
||||
for (size_t i = 0; i < 48; i++) {
|
||||
messageDigestBuf[i] = 0;
|
||||
}
|
||||
|
||||
// === step 4 ===
|
||||
// <= because we need to hash the last block too
|
||||
for (size_t i = 0; i <= (len + 16) / 16 - 1; i++) {
|
||||
for (int j = 0; j < 16; j++) {
|
||||
messageDigestBuf[16 + j] = newBuf[i * 16 + j];
|
||||
__m128i vx;
|
||||
__m128i vy;
|
||||
for (size_t i = 0; i <= (len + 16) / 16 - 1; i++)
|
||||
{
|
||||
|
||||
vx = _mm_load_si128((__m128i*) (newBuf + i * 16));
|
||||
_mm_store_si128((__m128i*) (messageDigestBuf + 16), vx);
|
||||
vy = _mm_load_si128((__m128i*) (messageDigestBuf));
|
||||
vy = _mm_xor_si128(vy, vx);
|
||||
_mm_store_si128((__m128i*) (messageDigestBuf + 32), vy);
|
||||
|
||||
|
||||
/*
|
||||
for (int j = 0; j < 16; j++)
|
||||
{
|
||||
//messageDigestBuf[16 + j] = newBuf[i * 16 + j];
|
||||
messageDigestBuf[32 + j] =
|
||||
(messageDigestBuf[16 + j] ^ messageDigestBuf[j]);
|
||||
}
|
||||
/*
|
||||
vy = _mm_load_si128((__m128i*) (messageDigestBuf));
|
||||
_mm_xor_si128(vy, vx);
|
||||
_mm_store_si128((__m128i*) messageDigestBuf, vy);
|
||||
md2_print_buf(48, messageDigestBuf);
|
||||
md2_print_buf(len + 16, newBuf);
|
||||
/*
|
||||
printf("newBuf: erstes Element:");
|
||||
printf(*(newBuf + i * 16));
|
||||
printf("mdb:");
|
||||
printf(*(messageDigestBuf + 16));
|
||||
*/
|
||||
|
||||
|
||||
|
||||
u_int8_t t = 0;
|
||||
|
||||
for (int j = 0; j < 18; j++) {
|
||||
for (int k = 0; k < 48; k++) {
|
||||
for (int j = 0; j < 18; j++)
|
||||
{
|
||||
for (int k = 0; k < 48; k++)
|
||||
{
|
||||
t = messageDigestBuf[k] = messageDigestBuf[k] ^ MD2_PI_SUBST[t];
|
||||
}
|
||||
t = (t + j) & 127;
|
||||
t = (t + j) & 255;
|
||||
}
|
||||
}
|
||||
// printf("messageDigestBuf: \n");
|
||||
|
|
Loading…
Reference in a new issue