diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 5ce3840..7a01d1b 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -9,8 +9,11 @@ "-g", "${fileDirname}/*.c", "${fileDirname}/md2_impls/*.c", + "${fileDirname}/md2_impls/md2_reference/*.c", "-o", - "${fileDirname}/../md2" + "${fileDirname}/../md2", + "-D", + "MD2_DETAILED_BENCHMARK" ], "options": { "cwd": "${fileDirname}" diff --git a/Implementierung/Makefile b/Implementierung/Makefile index 6b768c7..c18abf3 100644 --- a/Implementierung/Makefile +++ b/Implementierung/Makefile @@ -3,7 +3,11 @@ SRC = src/main.c src/helper.c src/io.c src/md2.c src/md2_impls/md2_common.c src/md2_impls/md2_0.c src/md2_impls/md2_1.c src/md2_impls/md2_2.c src/md2_impls/md2_3.c src/md2_impls/md2_reference/md2_reference.c OBJ = ${subst src,build,${SRC:.c=.o}} CC = gcc -CFLAGS = -Ilib -ggdb -std=c11 -g -Wall -Wextra -no-pie -O3 +DETAILED_BENCHMARK ?= 0 +CFLAGS = -Ilib -ggdb -std=c11 -g -Wall -Wextra -no-pie -O3 +ifeq ($(DETAILED_BENCHMARK), true) + CFLAGS += -DMD2_DETAILED_BENCHMARK +endif LDFLAGS = -pthread TESTFILES = t/1 t/2 t/5 t/10 t/20 t/50 t/100 t/1000 t/2000 t/5000 t/10000 TESTFILES_SIZES = ${subst t/,,${TESTFILES}} @@ -22,6 +26,8 @@ help: @echo @echo Available targets: @echo - all: build everything + @echo " When DETAILED_BENCHMARK=true is passed, the detailed benchmarks are enabled." + @echo " This will decrease the overall performance of the program." @echo - clean: clean distfiles @echo - help: show this help @echo - benchmarks: run benchmarks diff --git a/Implementierung/lib/helper.h b/Implementierung/lib/helper.h index a368d6e..55c5483 100644 --- a/Implementierung/lib/helper.h +++ b/Implementierung/lib/helper.h @@ -36,6 +36,8 @@ struct configuration { enum argumentParseResult parseArguments(int argc, char** argv, struct configuration* c); +double current_time(); + /** * @brief Run an md2_hash_func with benchmark timing * diff --git a/Implementierung/lib/md2_impls/md2_common.h b/Implementierung/lib/md2_impls/md2_common.h index c66e756..dad4801 100644 --- a/Implementierung/lib/md2_impls/md2_common.h +++ b/Implementierung/lib/md2_impls/md2_common.h @@ -9,6 +9,42 @@ #include #include +#include "../helper.h" + +#ifdef MD2_DETAILED_BENCHMARK +#define md2_process_detailed_benchmark_step_if_defined(step) \ + md2_process_detailed_benchmark_step(step); + +#define md2_print_detailed_benchmark_result_if_defined \ + md2_print_detailed_benchmark_result(); +#else +#define md2_process_detailed_benchmark_step_if_defined(step) +#define md2_print_detailed_benchmark_result_if_defined +#endif // MD2_DETAILED_BENCHMARK + +#define CHECKSUM_START_MARK \ + md2_process_detailed_benchmark_step_if_defined(CHECKSUM_START) +#define CHECKSUM_END_MARK \ + md2_process_detailed_benchmark_step_if_defined(CHECKSUM_END) +#define FIRST_LOOP_START_MARK \ + md2_process_detailed_benchmark_step_if_defined(FIRST_LOOP_START) +#define FIRST_LOOP_END_MARK \ + md2_process_detailed_benchmark_step_if_defined(FIRST_LOOP_END) +#define SECOND_LOOP_START_MARK \ + md2_process_detailed_benchmark_step_if_defined(SECOND_LOOP_START) +#define SECOND_LOOP_END_MARK \ + md2_process_detailed_benchmark_step_if_defined(SECOND_LOOP_END) +#define END_MARK md2_print_detailed_benchmark_result_if_defined + +enum md2_detailed_benchmark_step { + CHECKSUM_START = 0, + CHECKSUM_END, + FIRST_LOOP_START, + FIRST_LOOP_END, + SECOND_LOOP_START, + SECOND_LOOP_END +}; + /** * @brief Some digits of pi * @@ -23,4 +59,8 @@ extern unsigned char MD2_PI_SUBST[256]; */ void md2_print_buf(size_t len, uint8_t buf[len]); +void md2_process_detailed_benchmark_step(enum md2_detailed_benchmark_step step); + +void md2_print_detailed_benchmark_result(); + #endif // MD2_COMMON_H \ No newline at end of file diff --git a/Implementierung/src/helper.c b/Implementierung/src/helper.c index 4ffe475..d6101a5 100644 --- a/Implementierung/src/helper.c +++ b/Implementierung/src/helper.c @@ -30,8 +30,13 @@ void help(char *progname) { 1: optimized implementation\n\ 2: memory efficient implementation\n\ 3: threaded implementation\n\ - 4: reference implementation\n\ - \n", + 4: reference implementation\n" +#ifdef MD2_DETAILED_BENCHMARK + "\n\ + \n\033[1;33mWARNING: Detailed benchmarking is enabled. This will show detailed benchmaring results\n\ + after each hash calculation but will decrease the overall performance.\033[0m" +#endif // MD2_DETAILED_BENCHMARK + "\n", progname); } @@ -103,7 +108,7 @@ enum argumentParseResult parseArguments(int argc, char **argv, return RESULT_OK; } -double current_time(void) { +double current_time() { struct timespec t; clock_gettime(CLOCK_MONOTONIC, &t); return t.tv_sec + t.tv_nsec * 1e-9; diff --git a/Implementierung/src/md2_impls/md2_0.c b/Implementierung/src/md2_impls/md2_0.c index 93b7dee..fa645b8 100644 --- a/Implementierung/src/md2_impls/md2_0.c +++ b/Implementierung/src/md2_impls/md2_0.c @@ -40,7 +40,9 @@ void md2_hash_0(size_t len, const uint8_t buf[len], uint8_t out[16]) { // printBuf(len + 16, newBuf); // === step 2 === + CHECKSUM_START_MARK md2_checksum_0(len, newBuf); + CHECKSUM_END_MARK // printf("buf with cecksum: "); // printBuf(len + 16, newBuf); @@ -51,28 +53,32 @@ void md2_hash_0(size_t len, const uint8_t buf[len], uint8_t out[16]) { return; } - // === step 4 === // <= because we need to hash the last block too for (size_t i = 0; i <= (len + 16) / 16 - 1; i++) { + FIRST_LOOP_START_MARK for (int j = 0; j < 16; j++) { messageDigestBuf[16 + j] = newBuf[i * 16 + j]; messageDigestBuf[32 + j] = (messageDigestBuf[16 + j] ^ messageDigestBuf[j]); } + FIRST_LOOP_END_MARK u_int8_t t = 0; + SECOND_LOOP_START_MARK for (int j = 0; j < 18; j++) { for (int k = 0; k < 48; k++) { t = messageDigestBuf[k] = messageDigestBuf[k] ^ MD2_PI_SUBST[t]; } t = (t + j) % 256; } + SECOND_LOOP_END_MARK } // printf("messageDigestBuf: \n"); // printBuf(16, messageDigestBuf); + END_MARK memcpy(out, messageDigestBuf, 16); free(messageDigestBuf); diff --git a/Implementierung/src/md2_impls/md2_1.c b/Implementierung/src/md2_impls/md2_1.c index ac1d867..924282c 100644 --- a/Implementierung/src/md2_impls/md2_1.c +++ b/Implementierung/src/md2_impls/md2_1.c @@ -1,17 +1,14 @@ #include "../../lib/md2_impls/md2_1.h" -#include "../../lib/md2_impls/md2_common.h" #include +#include "../../lib/md2_impls/md2_common.h" -void md2_checksum_1(size_t len, uint8_t *buf) -{ +void md2_checksum_1(size_t len, uint8_t *buf) { uint8_t l = 0; - for (size_t i = 0; i < len / 16; i++) - { - for (int j = 0; j < 16; j++) - { + for (size_t i = 0; i < len / 16; i++) { + for (int j = 0; j < 16; j++) { u_int8_t c = buf[i * 16 + j]; // reference is wrong. It says: Set C[j] to S[c xor L]. But it should be: buf[len + j] ^= MD2_PI_SUBST[c ^ l]; @@ -39,18 +36,17 @@ static uint8_t PADDING[17][16] = { {15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0}, {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16}}; -void md2_hash_1(size_t len, const uint8_t buf[len], uint8_t out[16]) -{ +void md2_hash_1(size_t len, const uint8_t buf[len], uint8_t out[16]) { // === step 1 === int paddingNeeded = 16 - (len & 15); len += paddingNeeded; - uint8_t* newBuf = aligned_alloc(16, sizeof(uint8_t)*(len + 16)); + uint8_t *newBuf = aligned_alloc(16, sizeof(uint8_t) * (len + 16)); if (newBuf == NULL) { return; } - for(size_t i = 0; i < 16; i++) { + for (size_t i = 0; i < 16; i++) { newBuf[len + i] = 0; } @@ -59,7 +55,9 @@ void md2_hash_1(size_t len, const uint8_t buf[len], uint8_t out[16]) memcpy(newBuf + len - paddingNeeded, PADDING + paddingNeeded, paddingNeeded); // === step 2 === + CHECKSUM_START_MARK md2_checksum_1(len, newBuf); + CHECKSUM_END_MARK // === step 3 === uint8_t *messageDigestBuf = aligned_alloc(16, sizeof(uint8_t) * 48); @@ -74,27 +72,28 @@ void md2_hash_1(size_t len, const uint8_t buf[len], uint8_t out[16]) // === step 4 === __m128i vx; __m128i vy; - for (size_t i = 0; i <= (len + 16) / 16 - 1; i++) - { - vx = _mm_load_si128((__m128i*) (newBuf + i * 16)); - _mm_store_si128((__m128i*) (messageDigestBuf + 16), vx); - vy = _mm_load_si128((__m128i*) (messageDigestBuf)); + for (size_t i = 0; i <= (len + 16) / 16 - 1; i++) { + FIRST_LOOP_START_MARK + vx = _mm_load_si128((__m128i *)(newBuf + i * 16)); + _mm_store_si128((__m128i *)(messageDigestBuf + 16), vx); + vy = _mm_load_si128((__m128i *)(messageDigestBuf)); vy = _mm_xor_si128(vy, vx); - _mm_store_si128((__m128i*) (messageDigestBuf + 32), vy); - + _mm_store_si128((__m128i *)(messageDigestBuf + 32), vy); + FIRST_LOOP_END_MARK u_int8_t t = 0; - for (int j = 0; j < 18; j++) - { - for (int k = 0; k < 48; k++) - { + SECOND_LOOP_START_MARK + for (int j = 0; j < 18; j++) { + for (int k = 0; k < 48; k++) { t = messageDigestBuf[k] = messageDigestBuf[k] ^ MD2_PI_SUBST[t]; } t = (t + j) & 255; } + SECOND_LOOP_END_MARK } + END_MARK memcpy(out, messageDigestBuf, 16); free(messageDigestBuf); diff --git a/Implementierung/src/md2_impls/md2_2.c b/Implementierung/src/md2_impls/md2_2.c index b165d05..be602f2 100644 --- a/Implementierung/src/md2_impls/md2_2.c +++ b/Implementierung/src/md2_impls/md2_2.c @@ -3,19 +3,23 @@ #include "../../lib/md2_impls/md2_common.h" void process_block_hash(uint8_t block[16], uint8_t messageDigestBuf[48]) { + FIRST_LOOP_START_MARK for (int j = 0; j < 16; j++) { messageDigestBuf[16 + j] = block[j]; messageDigestBuf[32 + j] = (messageDigestBuf[16 + j] ^ messageDigestBuf[j]); } + FIRST_LOOP_END_MARK u_int8_t t = 0; + SECOND_LOOP_START_MARK for (int j = 0; j < 18; j++) { for (int k = 0; k < 48; k++) { t = messageDigestBuf[k] = messageDigestBuf[k] ^ MD2_PI_SUBST[t]; } t = (t + j) % 256; } + SECOND_LOOP_END_MARK } void process_block_checksum(uint8_t block[16], uint8_t checksum[16], @@ -74,21 +78,24 @@ void md2_hash_2(size_t len, const uint8_t buf[len], uint8_t out[16]) { return; } + CHECKSUM_START_MARK process_block_checksum(data, checksum, &l); + CHECKSUM_END_MARK process_block_hash(data, messageDigestBuf); bytes_left_to_read -= bytes_left_to_process; }; - fclose(file); - apply_padding(bytes_left_to_process % 16, data); process_block_checksum(data, checksum, &l); process_block_hash(data, messageDigestBuf); process_block_hash(checksum, messageDigestBuf); + + END_MARK memcpy(out, messageDigestBuf, 16); + fclose(file); free(data); free(messageDigestBuf); free(checksum); diff --git a/Implementierung/src/md2_impls/md2_3.c b/Implementierung/src/md2_impls/md2_3.c index 753de66..a8d13f3 100644 --- a/Implementierung/src/md2_impls/md2_3.c +++ b/Implementierung/src/md2_impls/md2_3.c @@ -10,20 +10,24 @@ struct thread_args { void process_nothread_hash(size_t len, const uint8_t buf[len], uint8_t messageDigestBuf[48]) { for (size_t i = 0; i < (len + 16) / 16 - 1; i++) { + FIRST_LOOP_START_MARK for (int j = 0; j < 16; j++) { messageDigestBuf[16 + j] = buf[i * 16 + j]; messageDigestBuf[32 + j] = (messageDigestBuf[16 + j] ^ messageDigestBuf[j]); } + FIRST_LOOP_END_MARK u_int8_t t = 0; + SECOND_LOOP_START_MARK for (int j = 0; j < 18; j++) { for (int k = 0; k < 48; k++) { t = messageDigestBuf[k] = messageDigestBuf[k] ^ MD2_PI_SUBST[t]; } t = (t + j) % 256; } + SECOND_LOOP_END_MARK } } @@ -48,7 +52,7 @@ void* process_checksum(void* threasdArgs) { } uint8_t l = 0; - + CHECKSUM_START_MARK for (size_t i = 0; i < args->len / 16; i++) { for (int j = 0; j < 16; j++) { u_int8_t c = args->buf[i * 16 + j]; @@ -56,6 +60,7 @@ void* process_checksum(void* threasdArgs) { l = checksum[j] ^= MD2_PI_SUBST[c ^ l]; } } + CHECKSUM_END_MARK pthread_exit(checksum); } @@ -107,6 +112,8 @@ void md2_hash_3(size_t len, const uint8_t buf[len], uint8_t out[16]) { } process_nothread_hash(16, checksum, messageDigestBuf); + + END_MARK memcpy(out, messageDigestBuf, 16); free(messageDigestBuf); diff --git a/Implementierung/src/md2_impls/md2_common.c b/Implementierung/src/md2_impls/md2_common.c index 1fa9cb6..963c4d4 100644 --- a/Implementierung/src/md2_impls/md2_common.c +++ b/Implementierung/src/md2_impls/md2_common.c @@ -26,4 +26,35 @@ void md2_print_buf(size_t len, uint8_t buf[len]) { printf("'%02x',", buf[i]); } printf("\n"); +} + +static double detailed_benchmark_timestamps[6] = {0, 0, 0, 0, 0, 0}; +static double detailed_benchmark_times[3] = {0, 0, 0}; + +void md2_process_detailed_benchmark_step( + enum md2_detailed_benchmark_step step) { + switch (step) { + case CHECKSUM_START: + case FIRST_LOOP_START: + case SECOND_LOOP_START: + detailed_benchmark_timestamps[step] = current_time(); + return; + + case CHECKSUM_END: + case FIRST_LOOP_END: + case SECOND_LOOP_END: + detailed_benchmark_timestamps[step] = current_time(); + detailed_benchmark_times[step / 2] += + detailed_benchmark_timestamps[step] - + detailed_benchmark_timestamps[step - 1]; + return; + } +} + +void md2_print_detailed_benchmark_result() { + printf("Detailed benchmarking results:\n"); + printf(" Checksum: %f\n", detailed_benchmark_times[0]); + printf(" First loop: %f\n", detailed_benchmark_times[1]); + printf(" Second loop: %f\n", detailed_benchmark_times[2]); + printf("\n"); } \ No newline at end of file