Feat: Detailed debugging tools
This commit is contained in:
parent
aa0f1e9170
commit
6cbadae0f2
10 changed files with 137 additions and 31 deletions
5
.vscode/tasks.json
vendored
5
.vscode/tasks.json
vendored
|
@ -9,8 +9,11 @@
|
|||
"-g",
|
||||
"${fileDirname}/*.c",
|
||||
"${fileDirname}/md2_impls/*.c",
|
||||
"${fileDirname}/md2_impls/md2_reference/*.c",
|
||||
"-o",
|
||||
"${fileDirname}/../md2"
|
||||
"${fileDirname}/../md2",
|
||||
"-D",
|
||||
"MD2_DETAILED_BENCHMARK"
|
||||
],
|
||||
"options": {
|
||||
"cwd": "${fileDirname}"
|
||||
|
|
|
@ -3,7 +3,11 @@
|
|||
SRC = src/main.c src/helper.c src/io.c src/md2.c src/md2_impls/md2_common.c src/md2_impls/md2_0.c src/md2_impls/md2_1.c src/md2_impls/md2_2.c src/md2_impls/md2_3.c src/md2_impls/md2_reference/md2_reference.c
|
||||
OBJ = ${subst src,build,${SRC:.c=.o}}
|
||||
CC = gcc
|
||||
CFLAGS = -Ilib -ggdb -std=c11 -g -Wall -Wextra -no-pie -O3
|
||||
DETAILED_BENCHMARK ?= 0
|
||||
CFLAGS = -Ilib -ggdb -std=c11 -g -Wall -Wextra -no-pie -O3
|
||||
ifeq ($(DETAILED_BENCHMARK), true)
|
||||
CFLAGS += -DMD2_DETAILED_BENCHMARK
|
||||
endif
|
||||
LDFLAGS = -pthread
|
||||
TESTFILES = t/1 t/2 t/5 t/10 t/20 t/50 t/100 t/1000 t/2000 t/5000 t/10000
|
||||
TESTFILES_SIZES = ${subst t/,,${TESTFILES}}
|
||||
|
@ -22,6 +26,8 @@ help:
|
|||
@echo
|
||||
@echo Available targets:
|
||||
@echo - all: build everything
|
||||
@echo " When DETAILED_BENCHMARK=true is passed, the detailed benchmarks are enabled."
|
||||
@echo " This will decrease the overall performance of the program."
|
||||
@echo - clean: clean distfiles
|
||||
@echo - help: show this help
|
||||
@echo - benchmarks: run benchmarks
|
||||
|
|
|
@ -36,6 +36,8 @@ struct configuration {
|
|||
enum argumentParseResult parseArguments(int argc, char** argv,
|
||||
struct configuration* c);
|
||||
|
||||
double current_time();
|
||||
|
||||
/**
|
||||
* @brief Run an md2_hash_func with benchmark timing
|
||||
*
|
||||
|
|
|
@ -9,6 +9,42 @@
|
|||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "../helper.h"
|
||||
|
||||
#ifdef MD2_DETAILED_BENCHMARK
|
||||
#define md2_process_detailed_benchmark_step_if_defined(step) \
|
||||
md2_process_detailed_benchmark_step(step);
|
||||
|
||||
#define md2_print_detailed_benchmark_result_if_defined \
|
||||
md2_print_detailed_benchmark_result();
|
||||
#else
|
||||
#define md2_process_detailed_benchmark_step_if_defined(step)
|
||||
#define md2_print_detailed_benchmark_result_if_defined
|
||||
#endif // MD2_DETAILED_BENCHMARK
|
||||
|
||||
#define CHECKSUM_START_MARK \
|
||||
md2_process_detailed_benchmark_step_if_defined(CHECKSUM_START)
|
||||
#define CHECKSUM_END_MARK \
|
||||
md2_process_detailed_benchmark_step_if_defined(CHECKSUM_END)
|
||||
#define FIRST_LOOP_START_MARK \
|
||||
md2_process_detailed_benchmark_step_if_defined(FIRST_LOOP_START)
|
||||
#define FIRST_LOOP_END_MARK \
|
||||
md2_process_detailed_benchmark_step_if_defined(FIRST_LOOP_END)
|
||||
#define SECOND_LOOP_START_MARK \
|
||||
md2_process_detailed_benchmark_step_if_defined(SECOND_LOOP_START)
|
||||
#define SECOND_LOOP_END_MARK \
|
||||
md2_process_detailed_benchmark_step_if_defined(SECOND_LOOP_END)
|
||||
#define END_MARK md2_print_detailed_benchmark_result_if_defined
|
||||
|
||||
enum md2_detailed_benchmark_step {
|
||||
CHECKSUM_START = 0,
|
||||
CHECKSUM_END,
|
||||
FIRST_LOOP_START,
|
||||
FIRST_LOOP_END,
|
||||
SECOND_LOOP_START,
|
||||
SECOND_LOOP_END
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Some digits of pi
|
||||
*
|
||||
|
@ -23,4 +59,8 @@ extern unsigned char MD2_PI_SUBST[256];
|
|||
*/
|
||||
void md2_print_buf(size_t len, uint8_t buf[len]);
|
||||
|
||||
void md2_process_detailed_benchmark_step(enum md2_detailed_benchmark_step step);
|
||||
|
||||
void md2_print_detailed_benchmark_result();
|
||||
|
||||
#endif // MD2_COMMON_H
|
|
@ -30,8 +30,13 @@ void help(char *progname) {
|
|||
1: optimized implementation\n\
|
||||
2: memory efficient implementation\n\
|
||||
3: threaded implementation\n\
|
||||
4: reference implementation\n\
|
||||
\n",
|
||||
4: reference implementation\n"
|
||||
#ifdef MD2_DETAILED_BENCHMARK
|
||||
"\n\
|
||||
\n\033[1;33mWARNING: Detailed benchmarking is enabled. This will show detailed benchmaring results\n\
|
||||
after each hash calculation but will decrease the overall performance.\033[0m"
|
||||
#endif // MD2_DETAILED_BENCHMARK
|
||||
"\n",
|
||||
progname);
|
||||
}
|
||||
|
||||
|
@ -103,7 +108,7 @@ enum argumentParseResult parseArguments(int argc, char **argv,
|
|||
return RESULT_OK;
|
||||
}
|
||||
|
||||
double current_time(void) {
|
||||
double current_time() {
|
||||
struct timespec t;
|
||||
clock_gettime(CLOCK_MONOTONIC, &t);
|
||||
return t.tv_sec + t.tv_nsec * 1e-9;
|
||||
|
|
|
@ -40,7 +40,9 @@ void md2_hash_0(size_t len, const uint8_t buf[len], uint8_t out[16]) {
|
|||
// printBuf(len + 16, newBuf);
|
||||
|
||||
// === step 2 ===
|
||||
CHECKSUM_START_MARK
|
||||
md2_checksum_0(len, newBuf);
|
||||
CHECKSUM_END_MARK
|
||||
|
||||
// printf("buf with cecksum: ");
|
||||
// printBuf(len + 16, newBuf);
|
||||
|
@ -51,28 +53,32 @@ void md2_hash_0(size_t len, const uint8_t buf[len], uint8_t out[16]) {
|
|||
return;
|
||||
}
|
||||
|
||||
|
||||
// === step 4 ===
|
||||
// <= because we need to hash the last block too
|
||||
for (size_t i = 0; i <= (len + 16) / 16 - 1; i++) {
|
||||
FIRST_LOOP_START_MARK
|
||||
for (int j = 0; j < 16; j++) {
|
||||
messageDigestBuf[16 + j] = newBuf[i * 16 + j];
|
||||
messageDigestBuf[32 + j] =
|
||||
(messageDigestBuf[16 + j] ^ messageDigestBuf[j]);
|
||||
}
|
||||
FIRST_LOOP_END_MARK
|
||||
|
||||
u_int8_t t = 0;
|
||||
|
||||
SECOND_LOOP_START_MARK
|
||||
for (int j = 0; j < 18; j++) {
|
||||
for (int k = 0; k < 48; k++) {
|
||||
t = messageDigestBuf[k] = messageDigestBuf[k] ^ MD2_PI_SUBST[t];
|
||||
}
|
||||
t = (t + j) % 256;
|
||||
}
|
||||
SECOND_LOOP_END_MARK
|
||||
}
|
||||
// printf("messageDigestBuf: \n");
|
||||
// printBuf(16, messageDigestBuf);
|
||||
|
||||
END_MARK
|
||||
memcpy(out, messageDigestBuf, 16);
|
||||
|
||||
free(messageDigestBuf);
|
||||
|
|
|
@ -1,17 +1,14 @@
|
|||
#include "../../lib/md2_impls/md2_1.h"
|
||||
|
||||
#include "../../lib/md2_impls/md2_common.h"
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "../../lib/md2_impls/md2_common.h"
|
||||
|
||||
void md2_checksum_1(size_t len, uint8_t *buf)
|
||||
{
|
||||
void md2_checksum_1(size_t len, uint8_t *buf) {
|
||||
uint8_t l = 0;
|
||||
|
||||
for (size_t i = 0; i < len / 16; i++)
|
||||
{
|
||||
for (int j = 0; j < 16; j++)
|
||||
{
|
||||
for (size_t i = 0; i < len / 16; i++) {
|
||||
for (int j = 0; j < 16; j++) {
|
||||
u_int8_t c = buf[i * 16 + j];
|
||||
// reference is wrong. It says: Set C[j] to S[c xor L]. But it should be:
|
||||
buf[len + j] ^= MD2_PI_SUBST[c ^ l];
|
||||
|
@ -39,18 +36,17 @@ static uint8_t PADDING[17][16] = {
|
|||
{15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0},
|
||||
{16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16}};
|
||||
|
||||
void md2_hash_1(size_t len, const uint8_t buf[len], uint8_t out[16])
|
||||
{
|
||||
void md2_hash_1(size_t len, const uint8_t buf[len], uint8_t out[16]) {
|
||||
// === step 1 ===
|
||||
int paddingNeeded = 16 - (len & 15);
|
||||
len += paddingNeeded;
|
||||
|
||||
uint8_t* newBuf = aligned_alloc(16, sizeof(uint8_t)*(len + 16));
|
||||
uint8_t *newBuf = aligned_alloc(16, sizeof(uint8_t) * (len + 16));
|
||||
if (newBuf == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
for(size_t i = 0; i < 16; i++) {
|
||||
for (size_t i = 0; i < 16; i++) {
|
||||
newBuf[len + i] = 0;
|
||||
}
|
||||
|
||||
|
@ -59,7 +55,9 @@ void md2_hash_1(size_t len, const uint8_t buf[len], uint8_t out[16])
|
|||
memcpy(newBuf + len - paddingNeeded, PADDING + paddingNeeded, paddingNeeded);
|
||||
|
||||
// === step 2 ===
|
||||
CHECKSUM_START_MARK
|
||||
md2_checksum_1(len, newBuf);
|
||||
CHECKSUM_END_MARK
|
||||
|
||||
// === step 3 ===
|
||||
uint8_t *messageDigestBuf = aligned_alloc(16, sizeof(uint8_t) * 48);
|
||||
|
@ -74,27 +72,28 @@ void md2_hash_1(size_t len, const uint8_t buf[len], uint8_t out[16])
|
|||
// === step 4 ===
|
||||
__m128i vx;
|
||||
__m128i vy;
|
||||
for (size_t i = 0; i <= (len + 16) / 16 - 1; i++)
|
||||
{
|
||||
vx = _mm_load_si128((__m128i*) (newBuf + i * 16));
|
||||
_mm_store_si128((__m128i*) (messageDigestBuf + 16), vx);
|
||||
vy = _mm_load_si128((__m128i*) (messageDigestBuf));
|
||||
for (size_t i = 0; i <= (len + 16) / 16 - 1; i++) {
|
||||
FIRST_LOOP_START_MARK
|
||||
vx = _mm_load_si128((__m128i *)(newBuf + i * 16));
|
||||
_mm_store_si128((__m128i *)(messageDigestBuf + 16), vx);
|
||||
vy = _mm_load_si128((__m128i *)(messageDigestBuf));
|
||||
vy = _mm_xor_si128(vy, vx);
|
||||
_mm_store_si128((__m128i*) (messageDigestBuf + 32), vy);
|
||||
|
||||
_mm_store_si128((__m128i *)(messageDigestBuf + 32), vy);
|
||||
FIRST_LOOP_END_MARK
|
||||
|
||||
u_int8_t t = 0;
|
||||
|
||||
for (int j = 0; j < 18; j++)
|
||||
{
|
||||
for (int k = 0; k < 48; k++)
|
||||
{
|
||||
SECOND_LOOP_START_MARK
|
||||
for (int j = 0; j < 18; j++) {
|
||||
for (int k = 0; k < 48; k++) {
|
||||
t = messageDigestBuf[k] = messageDigestBuf[k] ^ MD2_PI_SUBST[t];
|
||||
}
|
||||
t = (t + j) & 255;
|
||||
}
|
||||
SECOND_LOOP_END_MARK
|
||||
}
|
||||
|
||||
END_MARK
|
||||
memcpy(out, messageDigestBuf, 16);
|
||||
|
||||
free(messageDigestBuf);
|
||||
|
|
|
@ -3,19 +3,23 @@
|
|||
#include "../../lib/md2_impls/md2_common.h"
|
||||
|
||||
void process_block_hash(uint8_t block[16], uint8_t messageDigestBuf[48]) {
|
||||
FIRST_LOOP_START_MARK
|
||||
for (int j = 0; j < 16; j++) {
|
||||
messageDigestBuf[16 + j] = block[j];
|
||||
messageDigestBuf[32 + j] = (messageDigestBuf[16 + j] ^ messageDigestBuf[j]);
|
||||
}
|
||||
FIRST_LOOP_END_MARK
|
||||
|
||||
u_int8_t t = 0;
|
||||
|
||||
SECOND_LOOP_START_MARK
|
||||
for (int j = 0; j < 18; j++) {
|
||||
for (int k = 0; k < 48; k++) {
|
||||
t = messageDigestBuf[k] = messageDigestBuf[k] ^ MD2_PI_SUBST[t];
|
||||
}
|
||||
t = (t + j) % 256;
|
||||
}
|
||||
SECOND_LOOP_END_MARK
|
||||
}
|
||||
|
||||
void process_block_checksum(uint8_t block[16], uint8_t checksum[16],
|
||||
|
@ -74,21 +78,24 @@ void md2_hash_2(size_t len, const uint8_t buf[len], uint8_t out[16]) {
|
|||
return;
|
||||
}
|
||||
|
||||
CHECKSUM_START_MARK
|
||||
process_block_checksum(data, checksum, &l);
|
||||
CHECKSUM_END_MARK
|
||||
process_block_hash(data, messageDigestBuf);
|
||||
|
||||
bytes_left_to_read -= bytes_left_to_process;
|
||||
};
|
||||
|
||||
fclose(file);
|
||||
|
||||
apply_padding(bytes_left_to_process % 16, data);
|
||||
process_block_checksum(data, checksum, &l);
|
||||
process_block_hash(data, messageDigestBuf);
|
||||
|
||||
process_block_hash(checksum, messageDigestBuf);
|
||||
|
||||
END_MARK
|
||||
memcpy(out, messageDigestBuf, 16);
|
||||
|
||||
fclose(file);
|
||||
free(data);
|
||||
free(messageDigestBuf);
|
||||
free(checksum);
|
||||
|
|
|
@ -10,20 +10,24 @@ struct thread_args {
|
|||
void process_nothread_hash(size_t len, const uint8_t buf[len],
|
||||
uint8_t messageDigestBuf[48]) {
|
||||
for (size_t i = 0; i < (len + 16) / 16 - 1; i++) {
|
||||
FIRST_LOOP_START_MARK
|
||||
for (int j = 0; j < 16; j++) {
|
||||
messageDigestBuf[16 + j] = buf[i * 16 + j];
|
||||
messageDigestBuf[32 + j] =
|
||||
(messageDigestBuf[16 + j] ^ messageDigestBuf[j]);
|
||||
}
|
||||
FIRST_LOOP_END_MARK
|
||||
|
||||
u_int8_t t = 0;
|
||||
|
||||
SECOND_LOOP_START_MARK
|
||||
for (int j = 0; j < 18; j++) {
|
||||
for (int k = 0; k < 48; k++) {
|
||||
t = messageDigestBuf[k] = messageDigestBuf[k] ^ MD2_PI_SUBST[t];
|
||||
}
|
||||
t = (t + j) % 256;
|
||||
}
|
||||
SECOND_LOOP_END_MARK
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -48,7 +52,7 @@ void* process_checksum(void* threasdArgs) {
|
|||
}
|
||||
|
||||
uint8_t l = 0;
|
||||
|
||||
CHECKSUM_START_MARK
|
||||
for (size_t i = 0; i < args->len / 16; i++) {
|
||||
for (int j = 0; j < 16; j++) {
|
||||
u_int8_t c = args->buf[i * 16 + j];
|
||||
|
@ -56,6 +60,7 @@ void* process_checksum(void* threasdArgs) {
|
|||
l = checksum[j] ^= MD2_PI_SUBST[c ^ l];
|
||||
}
|
||||
}
|
||||
CHECKSUM_END_MARK
|
||||
pthread_exit(checksum);
|
||||
}
|
||||
|
||||
|
@ -107,6 +112,8 @@ void md2_hash_3(size_t len, const uint8_t buf[len], uint8_t out[16]) {
|
|||
}
|
||||
|
||||
process_nothread_hash(16, checksum, messageDigestBuf);
|
||||
|
||||
END_MARK
|
||||
memcpy(out, messageDigestBuf, 16);
|
||||
|
||||
free(messageDigestBuf);
|
||||
|
|
|
@ -26,4 +26,35 @@ void md2_print_buf(size_t len, uint8_t buf[len]) {
|
|||
printf("'%02x',", buf[i]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static double detailed_benchmark_timestamps[6] = {0, 0, 0, 0, 0, 0};
|
||||
static double detailed_benchmark_times[3] = {0, 0, 0};
|
||||
|
||||
void md2_process_detailed_benchmark_step(
|
||||
enum md2_detailed_benchmark_step step) {
|
||||
switch (step) {
|
||||
case CHECKSUM_START:
|
||||
case FIRST_LOOP_START:
|
||||
case SECOND_LOOP_START:
|
||||
detailed_benchmark_timestamps[step] = current_time();
|
||||
return;
|
||||
|
||||
case CHECKSUM_END:
|
||||
case FIRST_LOOP_END:
|
||||
case SECOND_LOOP_END:
|
||||
detailed_benchmark_timestamps[step] = current_time();
|
||||
detailed_benchmark_times[step / 2] +=
|
||||
detailed_benchmark_timestamps[step] -
|
||||
detailed_benchmark_timestamps[step - 1];
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void md2_print_detailed_benchmark_result() {
|
||||
printf("Detailed benchmarking results:\n");
|
||||
printf(" Checksum: %f\n", detailed_benchmark_times[0]);
|
||||
printf(" First loop: %f\n", detailed_benchmark_times[1]);
|
||||
printf(" Second loop: %f\n", detailed_benchmark_times[2]);
|
||||
printf("\n");
|
||||
}
|
Loading…
Reference in a new issue