From 31e6ea44fecc67d50be615d5c7d9ecf04245fdeb Mon Sep 17 00:00:00 2001 From: Fabian Druschke Date: Fri, 20 Sep 2024 00:36:12 +0200 Subject: [PATCH] Added Philox AVX2 optimized PRNG --- src/Makefile.am | 4 +-- src/gui.c | 60 +++++++++++++++++++++++++++++++++++++++- src/options.c | 12 +++++++- src/philox/philox_prng.c | 59 +++++++++++++++++++++++++++++++++++++++ src/philox/philox_prng.h | 48 ++++++++++++++++++++++++++++++++ src/prng.c | 47 +++++++++++++++++++++++++++++++ src/prng.h | 8 ++++++ 7 files changed, 234 insertions(+), 4 deletions(-) create mode 100644 src/philox/philox_prng.c create mode 100644 src/philox/philox_prng.h diff --git a/src/Makefile.am b/src/Makefile.am index ac652c91..c3f9eaa7 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,10 +1,10 @@ # what flags you want to pass to the C compiler & linker #CFLAGS = -lncurses -lparted -AM_CFLAGS = +AM_CFLAGS = -march=native -O3 -std=c11 AM_LDFLAGS = # this lists the binaries to produce, the (non-PHONY, binary) targets in # the previous manual Makefile bin_PROGRAMS = nwipe -nwipe_SOURCES = context.h logging.h options.h prng.h version.h temperature.h nwipe.c gui.c method.h pass.c device.c gui.h isaac_rand/isaac_standard.h isaac_rand/isaac_rand.h isaac_rand/isaac_rand.c isaac_rand/isaac64.h isaac_rand/isaac64.c mt19937ar-cok/mt19937ar-cok.c nwipe.h mt19937ar-cok/mt19937ar-cok.h alfg/add_lagg_fibonacci_prng.h alfg/add_lagg_fibonacci_prng.c xor/xoroshiro256_prng.h xor/xoroshiro256_prng.c pass.h device.h logging.c method.c options.c prng.c version.c temperature.c PDFGen/pdfgen.h PDFGen/pdfgen.c create_pdf.c create_pdf.h embedded_images/shred_db.jpg.c embedded_images/shred_db.jpg.h embedded_images/tick_erased.jpg.c embedded_images/tick_erased.jpg.h embedded_images/redcross.c embedded_images/redcross.h hpa_dco.h hpa_dco.c miscellaneous.h miscellaneous.c embedded_images/nwipe_exclamation.jpg.h embedded_images/nwipe_exclamation.jpg.c conf.h conf.c customers.h customers.c hddtemp_scsi/hddtemp.h hddtemp_scsi/scsi.h hddtemp_scsi/scsicmds.h hddtemp_scsi/get_scsi_temp.c hddtemp_scsi/scsi.c hddtemp_scsi/scsicmds.c +nwipe_SOURCES = context.h logging.h options.h prng.h version.h temperature.h nwipe.c gui.c method.h pass.c device.c gui.h isaac_rand/isaac_standard.h isaac_rand/isaac_rand.h isaac_rand/isaac_rand.c isaac_rand/isaac64.h isaac_rand/isaac64.c mt19937ar-cok/mt19937ar-cok.c nwipe.h mt19937ar-cok/mt19937ar-cok.h alfg/add_lagg_fibonacci_prng.h alfg/add_lagg_fibonacci_prng.c xor/xoroshiro256_prng.h xor/xoroshiro256_prng.c philox/philox_prng.h philox/philox_prng.c pass.h device.h logging.c method.c options.c prng.c version.c temperature.c PDFGen/pdfgen.h PDFGen/pdfgen.c create_pdf.c create_pdf.h embedded_images/shred_db.jpg.c embedded_images/shred_db.jpg.h embedded_images/tick_erased.jpg.c embedded_images/tick_erased.jpg.h embedded_images/redcross.c embedded_images/redcross.h hpa_dco.h hpa_dco.c miscellaneous.h miscellaneous.c embedded_images/nwipe_exclamation.jpg.h embedded_images/nwipe_exclamation.jpg.c conf.h conf.c customers.h customers.c hddtemp_scsi/hddtemp.h hddtemp_scsi/scsi.h hddtemp_scsi/scsicmds.h hddtemp_scsi/get_scsi_temp.c hddtemp_scsi/scsi.c hddtemp_scsi/scsicmds.c nwipe_LDADD = $(PARTED_LIBS) $(LIBCONFIG) diff --git a/src/gui.c b/src/gui.c index b8fa735b..cb6a268b 100644 --- a/src/gui.c +++ b/src/gui.c @@ -1616,11 +1616,12 @@ void nwipe_gui_prng( void ) extern nwipe_prng_t nwipe_aes_ctr_prng; extern nwipe_prng_t nwipe_xoroshiro256_prng; extern nwipe_prng_t nwipe_add_lagg_fibonacci_prng; + extern nwipe_prng_t nwipe_philox_prng; extern int terminate_signal; /* The number of implemented PRNGs. */ - const int count = 5; + const int count = 6; /* The first tabstop. */ const int tab1 = 2; @@ -1662,6 +1663,10 @@ void nwipe_gui_prng( void ) { focus = 4; } + if( nwipe_options.prng == &nwipe_philox_prng ) + { + focus = 5; + } do { /* Clear the main window. */ @@ -1678,6 +1683,7 @@ void nwipe_gui_prng( void ) mvwprintw( main_window, yy++, tab1, " %s", nwipe_isaac64.label ); mvwprintw( main_window, yy++, tab1, " %s", nwipe_add_lagg_fibonacci_prng.label ); mvwprintw( main_window, yy++, tab1, " %s", nwipe_xoroshiro256_prng.label ); + mvwprintw( main_window, yy++, tab1, " %s", nwipe_philox_prng.label ); yy++; /* Print the cursor. */ @@ -1852,6 +1858,54 @@ void nwipe_gui_prng( void ) tab1, "especially for legacy systems, due to its efficiency and minimal demands. " ); break; + case 5: + + mvwprintw( main_window, + yy++, + tab1, + "Philox, originally developed by John Salmon, Mark Moraes, Ron O. Dror, and " ); + mvwprintw( main_window, + yy++, + tab1, + "David E. Shaw, is a counter-based PRNG designed for parallel pseudorandom " ); + mvwprintw( main_window, + yy++, + tab1, + "number generation. This implementation has been adapted by Fabian Druschke " ); + mvwprintw( main_window, + yy++, + tab1, + "to provide high-quality, fast pseudorandom numbers using AVX2 optimizations. " ); + mvwprintw( main_window, + yy++, + tab1, + "It generates 512-bit blocks, ensuring high performance and long periods of " ); + mvwprintw( main_window, + yy++, + tab1, + "2^256 or more, making it suitable for demanding applications. " ); + mvwprintw( main_window, + yy++, + tab1, + " " ); + mvwprintw( main_window, + yy++, + tab1, + "Philox uses simple arithmetic operations (multiplication, addition, and XOR), " ); + mvwprintw( main_window, + yy++, + tab1, + "making it both efficient and easy to parallelize. Fabian Druschke's adaptation " ); + mvwprintw( main_window, + yy++, + tab1, + "leverages modern hardware capabilities, providing excellent performance for " ); + mvwprintw( main_window, + yy++, + tab1, + "modern systems while maintaining reliability in various scenarios. " ); + break; + } /* switch */ @@ -1922,6 +1976,10 @@ void nwipe_gui_prng( void ) { nwipe_options.prng = &nwipe_xoroshiro256_prng; } + if( focus == 5 ) + { + nwipe_options.prng = &nwipe_philox_prng; + } return; case KEY_BACKSPACE: diff --git a/src/options.c b/src/options.c index c855d0e2..77cbf987 100644 --- a/src/options.c +++ b/src/options.c @@ -44,6 +44,7 @@ int nwipe_options_parse( int argc, char** argv ) extern nwipe_prng_t nwipe_isaac64; extern nwipe_prng_t nwipe_add_lagg_fibonacci_prng; extern nwipe_prng_t nwipe_xoroshiro256_prng; + extern nwipe_prng_t nwipe_philox_prng; /* The getopt() result holder. */ int nwipe_opt; @@ -503,6 +504,11 @@ int nwipe_options_parse( int argc, char** argv ) nwipe_options.prng = &nwipe_xoroshiro256_prng; break; } + if( strcmp( optarg, "philox_prng" ) == 0 ) + { + nwipe_options.prng = &nwipe_philox_prng; + break; + } /* Else we do not know this PRNG. */ fprintf( stderr, "Error: Unknown prng '%s'.\n", optarg ); @@ -554,7 +560,7 @@ void nwipe_options_log( void ) extern nwipe_prng_t nwipe_isaac64; extern nwipe_prng_t nwipe_add_lagg_fibonacci_prng; extern nwipe_prng_t nwipe_xoroshiro256_prng; - + extern nwipe_prng_t nwipe_philox_prng; /** * Prints a manifest of options to the log. */ @@ -617,6 +623,10 @@ void nwipe_options_log( void ) { nwipe_log( NWIPE_LOG_NOTICE, " prng = XORoshiro-256 (EXPERIMENTAL!)" ); } + if( nwipe_options.prng == &nwipe_philox_prng ) + { + nwipe_log( NWIPE_LOG_NOTICE, " prng = Philox (EXPERIMENTAL!)" ); + } else { if( nwipe_options.prng == &nwipe_isaac ) diff --git a/src/philox/philox_prng.c b/src/philox/philox_prng.c new file mode 100644 index 00000000..21e93d53 --- /dev/null +++ b/src/philox/philox_prng.c @@ -0,0 +1,59 @@ +#include // Für AVX2-Instruktionen +#include +#include +#include "philox_prng.h" + + + +// Funktion zur Initialisierung des PRNG-Zustands mit einem Schlüssel +void philox_prng_init(philox_state_t* state, unsigned long init_key[], unsigned long key_length) { + assert(state != NULL && init_key != NULL); + + // Initialisiere den Zähler mit Null + state->counter = _mm256_setzero_si256(); + + // Initialisiere den Schlüssel. Da wir AVX2 verwenden, passen maximal vier 64-Bit-Schlüssel in einen __m256i. + // Fülle den Schlüsselbereich auf, falls key_length < 4 ist. + if (key_length >= 4) { + state->key = _mm256_set_epi64x(init_key[3], init_key[2], init_key[1], init_key[0]); + } else { + unsigned long temp_key[4] = {0, 0, 0, 0}; // Fülle mit Nullen auf + for (unsigned long i = 0; i < key_length; ++i) { + temp_key[i] = init_key[i]; + } + state->key = _mm256_set_epi64x(temp_key[3], temp_key[2], temp_key[1], temp_key[0]); + } +} + +// Funktion zur Erzeugung von 512 Bit Zufallszahlen und Kopieren in einen Puffer +void philox_prng_genrand_uint512_to_buf(philox_state_t* state, unsigned char* bufpos) { + assert(state != NULL && bufpos != NULL); // Validiere Eingaben + + // Temporärer Puffer für 512 Bit (64 Bytes) + unsigned char temp_buffer[64]; + memset(temp_buffer, 0, sizeof(temp_buffer)); // Initialisiere temporären Puffer mit Nullen + + // Philox-Kernberechnung: + __m256i multiplier = _mm256_set1_epi64x(0xD2B74407B1CE6E93); // Philox-Konstante für Multiplikation + __m256i increment = _mm256_set1_epi64x(0x9E3779B97F4A7C15); // Philox-Konstante für Inkrementierung + + // Hauptschleife, die den Zähler und Schlüssel transformiert (Philox-Kern, z.B. 10 Runden) + for (int i = 0; i < 10; ++i) { + // Untere 32 Bit multiplizieren und obere 32 Bit für weitere Operationen nutzen + __m256i lo = _mm256_mul_epu32(state->counter, multiplier); // Untere 32 Bit multiplizieren + __m256i hi = _mm256_srli_epi64(state->counter, 32); // Höhere 32 Bit verschieben + + // Schlüssel und Zähler modifizieren + state->counter = _mm256_add_epi64(state->counter, increment); + state->key = _mm256_xor_si256(state->key, hi); // XOR mit den oberen Bits des Zählers + } + + // Die Ergebnisse in den temporären Puffer kopieren + _mm256_storeu_si256((__m256i*)temp_buffer, state->counter); // Speichere die ersten 256 Bit + _mm256_storeu_si256((__m256i*)(temp_buffer + 32), state->key); // Speichere die zweiten 256 Bit + + // Kopiere die generierten Pseudozufallsdaten in den Zielpuffer + memcpy(bufpos, temp_buffer, sizeof(temp_buffer)); +} + + diff --git a/src/philox/philox_prng.h b/src/philox/philox_prng.h new file mode 100644 index 00000000..9b2aeb59 --- /dev/null +++ b/src/philox/philox_prng.h @@ -0,0 +1,48 @@ +/* + * philox_prng.h + * Header-Datei für die optimierte Philox PRNG Implementierung mit AVX2 + * Autor: [Ihr Name] + * Datum: [Aktuelles Datum] + * + * Dieses Werk ist gemeinfrei. Es kann von jedermann für beliebige Zwecke + * genutzt werden, ohne jegliche Bedingungen, es sei denn, solche Bedingungen + * sind gesetzlich vorgeschrieben. + */ + +#ifndef PHILOX_PRNG_H +#define PHILOX_PRNG_H + +#include +#include // Für AVX2-Intrinsics + +#ifdef __cplusplus +extern "C" { +#endif + +// Anzahl der Runden für den Philox 4x32 Algorithmus +#define NUM_ROUNDS 10 // Anzahl der Runden für Philox 4x32 + + +typedef struct philox_state_s { + __m256i counter; // Verwende 256-Bit AVX2 Register für den Zähler + __m256i key; // Verwende 256-Bit AVX2 Register für den Schlüssel +} philox_state_t; + +/* Initialisiert den Philox PRNG Zustand. + - state: Zeiger auf den Philox PRNG Zustandsstruktur. + - init_key: Array, das den Seed-Schlüssel enthält. + - key_length: Länge des Schlüsselarrays. */ +void philox_prng_init(philox_state_t* state, unsigned long init_key[], unsigned long key_length); + +/* Generiert Pseudorandom-Zahlen und schreibt sie in einen Puffer. + - state: Zeiger auf den initialisierten Philox PRNG Zustand. + - bufpos: Zielpuffer, in den die Pseudorandom-Zahlen geschrieben werden. + Der Puffer sollte mindestens 64 Bytes groß sein. */ +void philox_prng_genrand_uint512_to_buf(philox_state_t* state, unsigned char* bufpos); + +#ifdef __cplusplus +} +#endif + +#endif // PHILOX_PRNG_H + diff --git a/src/prng.c b/src/prng.c index abf1b6cc..1e98e834 100644 --- a/src/prng.c +++ b/src/prng.c @@ -27,6 +27,7 @@ #include "isaac_rand/isaac64.h" #include "alfg/add_lagg_fibonacci_prng.h" //Lagged Fibonacci generator prototype #include "xor/xoroshiro256_prng.h" //XORoshiro-256 prototype +#include "philox/philox_prng.h" //Philox prototype nwipe_prng_t nwipe_twister = { "Mersenne Twister (mt19937ar-cok)", nwipe_twister_init, nwipe_twister_read }; @@ -40,6 +41,9 @@ nwipe_prng_t nwipe_add_lagg_fibonacci_prng = { "Lagged Fibonacci generator", /* XOROSHIRO-256 PRNG Structure */ nwipe_prng_t nwipe_xoroshiro256_prng = { "XORoshiro-256", nwipe_xoroshiro256_prng_init, nwipe_xoroshiro256_prng_read }; +/* Philox PRNG Structure */ +nwipe_prng_t nwipe_philox_prng = { "Philox", nwipe_philox_prng_init, nwipe_philox_prng_read }; + /* Print given number of bytes from unsigned integer number to a byte stream buffer starting with low-endian. */ static inline void u32_to_buffer( u8* restrict buffer, u32 val, const int len ) { @@ -340,3 +344,46 @@ int nwipe_xoroshiro256_prng_read( NWIPE_PRNG_READ_SIGNATURE ) return 0; // Success } + +/* EXPERIMENTAL implementation of Philox algorithm to provide high-quality, but a lot of random numbers */ +int nwipe_philox_prng_init( NWIPE_PRNG_INIT_SIGNATURE ) +{ + nwipe_log( NWIPE_LOG_NOTICE, "Initialising Philox PRNG" ); + + if( *state == NULL ) + { + /* This is the first time that we have been called. */ + *state = malloc( sizeof( philox_state_t ) ); + } + philox_prng_init( (philox_state_t*) *state, (uint64_t*) ( seed->s ), seed->length / sizeof( uint64_t ) ); + + return 0; +} + + +int nwipe_philox_prng_read( NWIPE_PRNG_READ_SIGNATURE ) +{ + u8* restrict bufpos = buffer; + size_t words = count / SIZE_OF_PHILOX_PRNG; + + /* Loop to fill the buffer with blocks directly from the XORoroshiro256 algorithm */ + for( size_t ii = 0; ii < words; ++ii ) + { + philox_prng_genrand_uint512_to_buf( (philox_state_t*) *state, bufpos ); + bufpos += SIZE_OF_PHILOX_PRNG; // Move to the next block + } + + /* Handle remaining bytes if count is not a multiple of SIZE_OF_PHILOX_PRNG */ + const size_t remain = count % SIZE_OF_PHILOX_PRNG; + if( remain > 0 ) + { + unsigned char temp_output[SIZE_OF_PHILOX_PRNG]; // Temporary buffer for the last block + philox_prng_genrand_uint512_to_buf( (philox_state_t*) *state, temp_output ); + + // Copy the remaining bytes + memcpy( bufpos, temp_output, remain ); + } + + return 0; // Success +} + diff --git a/src/prng.h b/src/prng.h index a9add099..5193ac0a 100644 --- a/src/prng.h +++ b/src/prng.h @@ -63,6 +63,10 @@ int nwipe_add_lagg_fibonacci_prng_read( NWIPE_PRNG_READ_SIGNATURE ); int nwipe_xoroshiro256_prng_init( NWIPE_PRNG_INIT_SIGNATURE ); int nwipe_xoroshiro256_prng_read( NWIPE_PRNG_READ_SIGNATURE ); +/* Philox prototypes. */ +int nwipe_philox_prng_init( NWIPE_PRNG_INIT_SIGNATURE ); +int nwipe_philox_prng_read( NWIPE_PRNG_READ_SIGNATURE ); + /* Size of the twister is not derived from the architecture, but it is strictly 4 bytes */ #define SIZE_OF_TWISTER 4 @@ -76,4 +80,8 @@ int nwipe_xoroshiro256_prng_read( NWIPE_PRNG_READ_SIGNATURE ); /* Size of the XOROSHIRO-256 is not derived from the architecture, but it is strictly 32 bytes */ #define SIZE_OF_XOROSHIRO256_PRNG 32 +/* Size of the Philox is not derived from the architecture, but it is strictly 64 bytes */ +#define SIZE_OF_PHILOX_PRNG 64 + + #endif /* PRNG_H_ */