Merge branch 'master' of gitlab-gepasp.in.tum.de:gra22s/team199

This commit is contained in:
Dorian Zedler 2022-07-20 22:27:37 +02:00
commit 36197ff9b1
10 changed files with 140 additions and 33 deletions

5
.vscode/tasks.json vendored
View file

@ -9,8 +9,11 @@
"-g", "-g",
"${fileDirname}/*.c", "${fileDirname}/*.c",
"${fileDirname}/md2_impls/*.c", "${fileDirname}/md2_impls/*.c",
"${fileDirname}/md2_impls/md2_reference/*.c",
"-o", "-o",
"${fileDirname}/../md2" "${fileDirname}/../md2",
"-D",
"MD2_DETAILED_BENCHMARK"
], ],
"options": { "options": {
"cwd": "${fileDirname}" "cwd": "${fileDirname}"

View file

@ -3,7 +3,11 @@
SRC = src/main.c src/helper.c src/io.c src/md2.c src/md2_impls/md2_common.c src/md2_impls/md2_0.c src/md2_impls/md2_1.c src/md2_impls/md2_2.c src/md2_impls/md2_3.c src/md2_impls/md2_reference/md2_reference.c SRC = src/main.c src/helper.c src/io.c src/md2.c src/md2_impls/md2_common.c src/md2_impls/md2_0.c src/md2_impls/md2_1.c src/md2_impls/md2_2.c src/md2_impls/md2_3.c src/md2_impls/md2_reference/md2_reference.c
OBJ = ${subst src,build,${SRC:.c=.o}} OBJ = ${subst src,build,${SRC:.c=.o}}
CC = gcc CC = gcc
CFLAGS = -Ilib -ggdb -std=c11 -g -Wall -Wextra -no-pie -O3 DETAILED_BENCHMARK ?= 0
CFLAGS = -Ilib -std=c11 -Wall -Wextra -O3
ifeq ($(DETAILED_BENCHMARK), true)
CFLAGS += -DMD2_DETAILED_BENCHMARK
endif
LDFLAGS = -pthread LDFLAGS = -pthread
#TESTFILES = t/1 t/2 t/5 t/7 t/10 t/25 t/50 t/75 t/100 t/250 t/500 t/750 t/1000 t/1500 t/2000 t/2500 t/3000 t/3500 t/4000 t/4500 t/5000 t/5500 #TESTFILES = t/1 t/2 t/5 t/7 t/10 t/25 t/50 t/75 t/100 t/250 t/500 t/750 t/1000 t/1500 t/2000 t/2500 t/3000 t/3500 t/4000 t/4500 t/5000 t/5500
TESTFILES = t/6000 t/6500 t/7000 t/7500 t/8000 t/8500 t/9000 t/9500 t/10000 TESTFILES = t/6000 t/6500 t/7000 t/7500 t/8000 t/8500 t/9000 t/9500 t/10000
@ -23,9 +27,12 @@ help:
@echo @echo
@echo Available targets: @echo Available targets:
@echo - all: build everything @echo - all: build everything
@echo " When DETAILED_BENCHMARK=true is passed, the detailed benchmarks are enabled."
@echo " This will decrease the overall performance of the program."
@echo - clean: clean distfiles @echo - clean: clean distfiles
@echo - help: show this help @echo - help: show this help
@echo - benchmarks: run benchmarks (only works on linux!) @echo - benchmarks: run benchmarks
@echo - valgrind-check: run checks with valgrind
build/%.o: src/%.c build/%.o: src/%.c
@mkdir -p build/md2_impls/md2_reference @mkdir -p build/md2_impls/md2_reference
@ -73,7 +80,7 @@ benchmarks.csv: md2 ${TESTFILES}
benchmarks: benchmarks.csv benchmarks: benchmarks.csv
lint: md2 t/3 valgrind-check: md2 t/3
@for i in 0 1 2 3 4; do \ @for i in 0 1 2 3 4; do \
echo; \ echo; \

View file

@ -36,6 +36,8 @@ struct configuration {
enum argumentParseResult parseArguments(int argc, char** argv, enum argumentParseResult parseArguments(int argc, char** argv,
struct configuration* c); struct configuration* c);
double current_time();
/** /**
* @brief Run an md2_hash_func with benchmark timing * @brief Run an md2_hash_func with benchmark timing
* *

View file

@ -9,6 +9,42 @@
#include <string.h> #include <string.h>
#include <sys/types.h> #include <sys/types.h>
#include "../helper.h"
#ifdef MD2_DETAILED_BENCHMARK
#define md2_process_detailed_benchmark_step_if_defined(step) \
md2_process_detailed_benchmark_step(step);
#define md2_print_detailed_benchmark_result_if_defined \
md2_print_detailed_benchmark_result();
#else
#define md2_process_detailed_benchmark_step_if_defined(step)
#define md2_print_detailed_benchmark_result_if_defined
#endif // MD2_DETAILED_BENCHMARK
#define CHECKSUM_START_MARK \
md2_process_detailed_benchmark_step_if_defined(CHECKSUM_START)
#define CHECKSUM_END_MARK \
md2_process_detailed_benchmark_step_if_defined(CHECKSUM_END)
#define FIRST_LOOP_START_MARK \
md2_process_detailed_benchmark_step_if_defined(FIRST_LOOP_START)
#define FIRST_LOOP_END_MARK \
md2_process_detailed_benchmark_step_if_defined(FIRST_LOOP_END)
#define SECOND_LOOP_START_MARK \
md2_process_detailed_benchmark_step_if_defined(SECOND_LOOP_START)
#define SECOND_LOOP_END_MARK \
md2_process_detailed_benchmark_step_if_defined(SECOND_LOOP_END)
#define END_MARK md2_print_detailed_benchmark_result_if_defined
enum md2_detailed_benchmark_step {
CHECKSUM_START = 0,
CHECKSUM_END,
FIRST_LOOP_START,
FIRST_LOOP_END,
SECOND_LOOP_START,
SECOND_LOOP_END
};
/** /**
* @brief Some digits of pi * @brief Some digits of pi
* *
@ -23,4 +59,8 @@ extern unsigned char MD2_PI_SUBST[256];
*/ */
void md2_print_buf(size_t len, uint8_t buf[len]); void md2_print_buf(size_t len, uint8_t buf[len]);
void md2_process_detailed_benchmark_step(enum md2_detailed_benchmark_step step);
void md2_print_detailed_benchmark_result();
#endif // MD2_COMMON_H #endif // MD2_COMMON_H

View file

@ -30,8 +30,13 @@ void help(char *progname) {
1: optimized implementation\n\ 1: optimized implementation\n\
2: memory efficient implementation\n\ 2: memory efficient implementation\n\
3: threaded implementation\n\ 3: threaded implementation\n\
4: reference implementation\n\ 4: reference implementation\n"
\n", #ifdef MD2_DETAILED_BENCHMARK
"\n\
\n\033[1;33mWARNING: Detailed benchmarking is enabled. This will show detailed benchmaring results\n\
after each hash calculation but will decrease the overall performance.\033[0m"
#endif // MD2_DETAILED_BENCHMARK
"\n",
progname); progname);
} }
@ -103,7 +108,7 @@ enum argumentParseResult parseArguments(int argc, char **argv,
return RESULT_OK; return RESULT_OK;
} }
double current_time(void) { double current_time() {
struct timespec t; struct timespec t;
clock_gettime(CLOCK_MONOTONIC, &t); clock_gettime(CLOCK_MONOTONIC, &t);
return t.tv_sec + t.tv_nsec * 1e-9; return t.tv_sec + t.tv_nsec * 1e-9;

View file

@ -40,7 +40,9 @@ void md2_hash_0(size_t len, const uint8_t buf[len], uint8_t out[16]) {
// printBuf(len + 16, newBuf); // printBuf(len + 16, newBuf);
// === step 2 === // === step 2 ===
CHECKSUM_START_MARK
md2_checksum_0(len, newBuf); md2_checksum_0(len, newBuf);
CHECKSUM_END_MARK
// printf("buf with cecksum: "); // printf("buf with cecksum: ");
// printBuf(len + 16, newBuf); // printBuf(len + 16, newBuf);
@ -51,28 +53,32 @@ void md2_hash_0(size_t len, const uint8_t buf[len], uint8_t out[16]) {
return; return;
} }
// === step 4 === // === step 4 ===
// <= because we need to hash the last block too // <= because we need to hash the last block too
for (size_t i = 0; i <= (len + 16) / 16 - 1; i++) { for (size_t i = 0; i <= (len + 16) / 16 - 1; i++) {
FIRST_LOOP_START_MARK
for (int j = 0; j < 16; j++) { for (int j = 0; j < 16; j++) {
messageDigestBuf[16 + j] = newBuf[i * 16 + j]; messageDigestBuf[16 + j] = newBuf[i * 16 + j];
messageDigestBuf[32 + j] = messageDigestBuf[32 + j] =
(messageDigestBuf[16 + j] ^ messageDigestBuf[j]); (messageDigestBuf[16 + j] ^ messageDigestBuf[j]);
} }
FIRST_LOOP_END_MARK
u_int8_t t = 0; u_int8_t t = 0;
SECOND_LOOP_START_MARK
for (int j = 0; j < 18; j++) { for (int j = 0; j < 18; j++) {
for (int k = 0; k < 48; k++) { for (int k = 0; k < 48; k++) {
t = messageDigestBuf[k] = messageDigestBuf[k] ^ MD2_PI_SUBST[t]; t = messageDigestBuf[k] = messageDigestBuf[k] ^ MD2_PI_SUBST[t];
} }
t = (t + j) % 256; t = (t + j) % 256;
} }
SECOND_LOOP_END_MARK
} }
// printf("messageDigestBuf: \n"); // printf("messageDigestBuf: \n");
// printBuf(16, messageDigestBuf); // printBuf(16, messageDigestBuf);
END_MARK
memcpy(out, messageDigestBuf, 16); memcpy(out, messageDigestBuf, 16);
free(messageDigestBuf); free(messageDigestBuf);

View file

@ -1,17 +1,14 @@
#include "../../lib/md2_impls/md2_1.h" #include "../../lib/md2_impls/md2_1.h"
#include "../../lib/md2_impls/md2_common.h"
#include <immintrin.h> #include <immintrin.h>
#include "../../lib/md2_impls/md2_common.h"
void md2_checksum_1(size_t len, uint8_t *buf) void md2_checksum_1(size_t len, uint8_t *buf) {
{
uint8_t l = 0; uint8_t l = 0;
for (size_t i = 0; i < len / 16; i++) for (size_t i = 0; i < len / 16; i++) {
{ for (int j = 0; j < 16; j++) {
for (int j = 0; j < 16; j++)
{
u_int8_t c = buf[i * 16 + j]; u_int8_t c = buf[i * 16 + j];
// reference is wrong. It says: Set C[j] to S[c xor L]. But it should be: // reference is wrong. It says: Set C[j] to S[c xor L]. But it should be:
buf[len + j] ^= MD2_PI_SUBST[c ^ l]; buf[len + j] ^= MD2_PI_SUBST[c ^ l];
@ -39,18 +36,17 @@ static uint8_t PADDING[17][16] = {
{15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0}, {15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0},
{16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16}}; {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16}};
void md2_hash_1(size_t len, const uint8_t buf[len], uint8_t out[16]) void md2_hash_1(size_t len, const uint8_t buf[len], uint8_t out[16]) {
{
// === step 1 === // === step 1 ===
int paddingNeeded = 16 - (len & 15); int paddingNeeded = 16 - (len & 15);
len += paddingNeeded; len += paddingNeeded;
uint8_t* newBuf = aligned_alloc(16, sizeof(uint8_t)*(len + 16)); uint8_t *newBuf = aligned_alloc(16, sizeof(uint8_t) * (len + 16));
if (newBuf == NULL) { if (newBuf == NULL) {
return; return;
} }
for(size_t i = 0; i < 16; i++) { for (size_t i = 0; i < 16; i++) {
newBuf[len + i] = 0; newBuf[len + i] = 0;
} }
@ -59,7 +55,9 @@ void md2_hash_1(size_t len, const uint8_t buf[len], uint8_t out[16])
memcpy(newBuf + len - paddingNeeded, PADDING + paddingNeeded, paddingNeeded); memcpy(newBuf + len - paddingNeeded, PADDING + paddingNeeded, paddingNeeded);
// === step 2 === // === step 2 ===
CHECKSUM_START_MARK
md2_checksum_1(len, newBuf); md2_checksum_1(len, newBuf);
CHECKSUM_END_MARK
// === step 3 === // === step 3 ===
uint8_t *messageDigestBuf = aligned_alloc(16, sizeof(uint8_t) * 48); uint8_t *messageDigestBuf = aligned_alloc(16, sizeof(uint8_t) * 48);
@ -74,27 +72,28 @@ void md2_hash_1(size_t len, const uint8_t buf[len], uint8_t out[16])
// === step 4 === // === step 4 ===
__m128i vx; __m128i vx;
__m128i vy; __m128i vy;
for (size_t i = 0; i <= (len + 16) / 16 - 1; i++) for (size_t i = 0; i <= (len + 16) / 16 - 1; i++) {
{ FIRST_LOOP_START_MARK
vx = _mm_load_si128((__m128i*) (newBuf + i * 16)); vx = _mm_load_si128((__m128i *)(newBuf + i * 16));
_mm_store_si128((__m128i*) (messageDigestBuf + 16), vx); _mm_store_si128((__m128i *)(messageDigestBuf + 16), vx);
vy = _mm_load_si128((__m128i*) (messageDigestBuf)); vy = _mm_load_si128((__m128i *)(messageDigestBuf));
vy = _mm_xor_si128(vy, vx); vy = _mm_xor_si128(vy, vx);
_mm_store_si128((__m128i*) (messageDigestBuf + 32), vy); _mm_store_si128((__m128i *)(messageDigestBuf + 32), vy);
FIRST_LOOP_END_MARK
u_int8_t t = 0; u_int8_t t = 0;
for (int j = 0; j < 18; j++) SECOND_LOOP_START_MARK
{ for (int j = 0; j < 18; j++) {
for (int k = 0; k < 48; k++) for (int k = 0; k < 48; k++) {
{
t = messageDigestBuf[k] = messageDigestBuf[k] ^ MD2_PI_SUBST[t]; t = messageDigestBuf[k] = messageDigestBuf[k] ^ MD2_PI_SUBST[t];
} }
t = (t + j) & 255; t = (t + j) & 255;
} }
SECOND_LOOP_END_MARK
} }
END_MARK
memcpy(out, messageDigestBuf, 16); memcpy(out, messageDigestBuf, 16);
free(messageDigestBuf); free(messageDigestBuf);

View file

@ -3,19 +3,23 @@
#include "../../lib/md2_impls/md2_common.h" #include "../../lib/md2_impls/md2_common.h"
void process_block_hash(uint8_t block[16], uint8_t messageDigestBuf[48]) { void process_block_hash(uint8_t block[16], uint8_t messageDigestBuf[48]) {
FIRST_LOOP_START_MARK
for (int j = 0; j < 16; j++) { for (int j = 0; j < 16; j++) {
messageDigestBuf[16 + j] = block[j]; messageDigestBuf[16 + j] = block[j];
messageDigestBuf[32 + j] = (messageDigestBuf[16 + j] ^ messageDigestBuf[j]); messageDigestBuf[32 + j] = (messageDigestBuf[16 + j] ^ messageDigestBuf[j]);
} }
FIRST_LOOP_END_MARK
u_int8_t t = 0; u_int8_t t = 0;
SECOND_LOOP_START_MARK
for (int j = 0; j < 18; j++) { for (int j = 0; j < 18; j++) {
for (int k = 0; k < 48; k++) { for (int k = 0; k < 48; k++) {
t = messageDigestBuf[k] = messageDigestBuf[k] ^ MD2_PI_SUBST[t]; t = messageDigestBuf[k] = messageDigestBuf[k] ^ MD2_PI_SUBST[t];
} }
t = (t + j) % 256; t = (t + j) % 256;
} }
SECOND_LOOP_END_MARK
} }
void process_block_checksum(uint8_t block[16], uint8_t checksum[16], void process_block_checksum(uint8_t block[16], uint8_t checksum[16],
@ -74,21 +78,24 @@ void md2_hash_2(size_t len, const uint8_t buf[len], uint8_t out[16]) {
return; return;
} }
CHECKSUM_START_MARK
process_block_checksum(data, checksum, &l); process_block_checksum(data, checksum, &l);
CHECKSUM_END_MARK
process_block_hash(data, messageDigestBuf); process_block_hash(data, messageDigestBuf);
bytes_left_to_read -= bytes_left_to_process; bytes_left_to_read -= bytes_left_to_process;
}; };
fclose(file);
apply_padding(bytes_left_to_process % 16, data); apply_padding(bytes_left_to_process % 16, data);
process_block_checksum(data, checksum, &l); process_block_checksum(data, checksum, &l);
process_block_hash(data, messageDigestBuf); process_block_hash(data, messageDigestBuf);
process_block_hash(checksum, messageDigestBuf); process_block_hash(checksum, messageDigestBuf);
END_MARK
memcpy(out, messageDigestBuf, 16); memcpy(out, messageDigestBuf, 16);
fclose(file);
free(data); free(data);
free(messageDigestBuf); free(messageDigestBuf);
free(checksum); free(checksum);

View file

@ -10,20 +10,24 @@ struct thread_args {
void process_nothread_hash(size_t len, const uint8_t buf[len], void process_nothread_hash(size_t len, const uint8_t buf[len],
uint8_t messageDigestBuf[48]) { uint8_t messageDigestBuf[48]) {
for (size_t i = 0; i < (len + 16) / 16 - 1; i++) { for (size_t i = 0; i < (len + 16) / 16 - 1; i++) {
FIRST_LOOP_START_MARK
for (int j = 0; j < 16; j++) { for (int j = 0; j < 16; j++) {
messageDigestBuf[16 + j] = buf[i * 16 + j]; messageDigestBuf[16 + j] = buf[i * 16 + j];
messageDigestBuf[32 + j] = messageDigestBuf[32 + j] =
(messageDigestBuf[16 + j] ^ messageDigestBuf[j]); (messageDigestBuf[16 + j] ^ messageDigestBuf[j]);
} }
FIRST_LOOP_END_MARK
u_int8_t t = 0; u_int8_t t = 0;
SECOND_LOOP_START_MARK
for (int j = 0; j < 18; j++) { for (int j = 0; j < 18; j++) {
for (int k = 0; k < 48; k++) { for (int k = 0; k < 48; k++) {
t = messageDigestBuf[k] = messageDigestBuf[k] ^ MD2_PI_SUBST[t]; t = messageDigestBuf[k] = messageDigestBuf[k] ^ MD2_PI_SUBST[t];
} }
t = (t + j) % 256; t = (t + j) % 256;
} }
SECOND_LOOP_END_MARK
} }
} }
@ -48,7 +52,7 @@ void* process_checksum(void* threasdArgs) {
} }
uint8_t l = 0; uint8_t l = 0;
CHECKSUM_START_MARK
for (size_t i = 0; i < args->len / 16; i++) { for (size_t i = 0; i < args->len / 16; i++) {
for (int j = 0; j < 16; j++) { for (int j = 0; j < 16; j++) {
u_int8_t c = args->buf[i * 16 + j]; u_int8_t c = args->buf[i * 16 + j];
@ -56,6 +60,7 @@ void* process_checksum(void* threasdArgs) {
l = checksum[j] ^= MD2_PI_SUBST[c ^ l]; l = checksum[j] ^= MD2_PI_SUBST[c ^ l];
} }
} }
CHECKSUM_END_MARK
pthread_exit(checksum); pthread_exit(checksum);
} }
@ -107,6 +112,8 @@ void md2_hash_3(size_t len, const uint8_t buf[len], uint8_t out[16]) {
} }
process_nothread_hash(16, checksum, messageDigestBuf); process_nothread_hash(16, checksum, messageDigestBuf);
END_MARK
memcpy(out, messageDigestBuf, 16); memcpy(out, messageDigestBuf, 16);
free(messageDigestBuf); free(messageDigestBuf);

View file

@ -27,3 +27,34 @@ void md2_print_buf(size_t len, uint8_t buf[len]) {
} }
printf("\n"); printf("\n");
} }
static double detailed_benchmark_timestamps[6] = {0, 0, 0, 0, 0, 0};
static double detailed_benchmark_times[3] = {0, 0, 0};
void md2_process_detailed_benchmark_step(
enum md2_detailed_benchmark_step step) {
switch (step) {
case CHECKSUM_START:
case FIRST_LOOP_START:
case SECOND_LOOP_START:
detailed_benchmark_timestamps[step] = current_time();
return;
case CHECKSUM_END:
case FIRST_LOOP_END:
case SECOND_LOOP_END:
detailed_benchmark_timestamps[step] = current_time();
detailed_benchmark_times[step / 2] +=
detailed_benchmark_timestamps[step] -
detailed_benchmark_timestamps[step - 1];
return;
}
}
void md2_print_detailed_benchmark_result() {
printf("Detailed benchmarking results:\n");
printf(" Checksum: %f\n", detailed_benchmark_times[0]);
printf(" First loop: %f\n", detailed_benchmark_times[1]);
printf(" Second loop: %f\n", detailed_benchmark_times[2]);
printf("\n");
}