Skip to content

Commit

Permalink
Added Philox AVX2 optimized PRNG
Browse files Browse the repository at this point in the history
  • Loading branch information
Knogle committed Sep 19, 2024
1 parent f250aee commit 31e6ea4
Show file tree
Hide file tree
Showing 7 changed files with 234 additions and 4 deletions.
4 changes: 2 additions & 2 deletions src/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# what flags you want to pass to the C compiler & linker
#CFLAGS = -lncurses -lparted
AM_CFLAGS =
AM_CFLAGS = -march=native -O3 -std=c11
AM_LDFLAGS =

# this lists the binaries to produce, the (non-PHONY, binary) targets in
# the previous manual Makefile
bin_PROGRAMS = nwipe
nwipe_SOURCES = context.h logging.h options.h prng.h version.h temperature.h nwipe.c gui.c method.h pass.c device.c gui.h isaac_rand/isaac_standard.h isaac_rand/isaac_rand.h isaac_rand/isaac_rand.c isaac_rand/isaac64.h isaac_rand/isaac64.c mt19937ar-cok/mt19937ar-cok.c nwipe.h mt19937ar-cok/mt19937ar-cok.h alfg/add_lagg_fibonacci_prng.h alfg/add_lagg_fibonacci_prng.c xor/xoroshiro256_prng.h xor/xoroshiro256_prng.c pass.h device.h logging.c method.c options.c prng.c version.c temperature.c PDFGen/pdfgen.h PDFGen/pdfgen.c create_pdf.c create_pdf.h embedded_images/shred_db.jpg.c embedded_images/shred_db.jpg.h embedded_images/tick_erased.jpg.c embedded_images/tick_erased.jpg.h embedded_images/redcross.c embedded_images/redcross.h hpa_dco.h hpa_dco.c miscellaneous.h miscellaneous.c embedded_images/nwipe_exclamation.jpg.h embedded_images/nwipe_exclamation.jpg.c conf.h conf.c customers.h customers.c hddtemp_scsi/hddtemp.h hddtemp_scsi/scsi.h hddtemp_scsi/scsicmds.h hddtemp_scsi/get_scsi_temp.c hddtemp_scsi/scsi.c hddtemp_scsi/scsicmds.c
nwipe_SOURCES = context.h logging.h options.h prng.h version.h temperature.h nwipe.c gui.c method.h pass.c device.c gui.h isaac_rand/isaac_standard.h isaac_rand/isaac_rand.h isaac_rand/isaac_rand.c isaac_rand/isaac64.h isaac_rand/isaac64.c mt19937ar-cok/mt19937ar-cok.c nwipe.h mt19937ar-cok/mt19937ar-cok.h alfg/add_lagg_fibonacci_prng.h alfg/add_lagg_fibonacci_prng.c xor/xoroshiro256_prng.h xor/xoroshiro256_prng.c philox/philox_prng.h philox/philox_prng.c pass.h device.h logging.c method.c options.c prng.c version.c temperature.c PDFGen/pdfgen.h PDFGen/pdfgen.c create_pdf.c create_pdf.h embedded_images/shred_db.jpg.c embedded_images/shred_db.jpg.h embedded_images/tick_erased.jpg.c embedded_images/tick_erased.jpg.h embedded_images/redcross.c embedded_images/redcross.h hpa_dco.h hpa_dco.c miscellaneous.h miscellaneous.c embedded_images/nwipe_exclamation.jpg.h embedded_images/nwipe_exclamation.jpg.c conf.h conf.c customers.h customers.c hddtemp_scsi/hddtemp.h hddtemp_scsi/scsi.h hddtemp_scsi/scsicmds.h hddtemp_scsi/get_scsi_temp.c hddtemp_scsi/scsi.c hddtemp_scsi/scsicmds.c
nwipe_LDADD = $(PARTED_LIBS) $(LIBCONFIG)
60 changes: 59 additions & 1 deletion src/gui.c
Original file line number Diff line number Diff line change
Expand Up @@ -1616,11 +1616,12 @@ void nwipe_gui_prng( void )
extern nwipe_prng_t nwipe_aes_ctr_prng;
extern nwipe_prng_t nwipe_xoroshiro256_prng;
extern nwipe_prng_t nwipe_add_lagg_fibonacci_prng;
extern nwipe_prng_t nwipe_philox_prng;

extern int terminate_signal;

/* The number of implemented PRNGs. */
const int count = 5;
const int count = 6;

/* The first tabstop. */
const int tab1 = 2;
Expand Down Expand Up @@ -1662,6 +1663,10 @@ void nwipe_gui_prng( void )
{
focus = 4;
}
if( nwipe_options.prng == &nwipe_philox_prng )
{
focus = 5;
}
do
{
/* Clear the main window. */
Expand All @@ -1678,6 +1683,7 @@ void nwipe_gui_prng( void )
mvwprintw( main_window, yy++, tab1, " %s", nwipe_isaac64.label );
mvwprintw( main_window, yy++, tab1, " %s", nwipe_add_lagg_fibonacci_prng.label );
mvwprintw( main_window, yy++, tab1, " %s", nwipe_xoroshiro256_prng.label );
mvwprintw( main_window, yy++, tab1, " %s", nwipe_philox_prng.label );
yy++;

/* Print the cursor. */
Expand Down Expand Up @@ -1852,6 +1858,54 @@ void nwipe_gui_prng( void )
tab1,
"especially for legacy systems, due to its efficiency and minimal demands. " );
break;
case 5:

mvwprintw( main_window,
yy++,
tab1,
"Philox, originally developed by John Salmon, Mark Moraes, Ron O. Dror, and " );
mvwprintw( main_window,
yy++,
tab1,
"David E. Shaw, is a counter-based PRNG designed for parallel pseudorandom " );
mvwprintw( main_window,
yy++,
tab1,
"number generation. This implementation has been adapted by Fabian Druschke " );
mvwprintw( main_window,
yy++,
tab1,
"to provide high-quality, fast pseudorandom numbers using AVX2 optimizations. " );
mvwprintw( main_window,
yy++,
tab1,
"It generates 512-bit blocks, ensuring high performance and long periods of " );
mvwprintw( main_window,
yy++,
tab1,
"2^256 or more, making it suitable for demanding applications. " );
mvwprintw( main_window,
yy++,
tab1,
" " );
mvwprintw( main_window,
yy++,
tab1,
"Philox uses simple arithmetic operations (multiplication, addition, and XOR), " );
mvwprintw( main_window,
yy++,
tab1,
"making it both efficient and easy to parallelize. Fabian Druschke's adaptation " );
mvwprintw( main_window,
yy++,
tab1,
"leverages modern hardware capabilities, providing excellent performance for " );
mvwprintw( main_window,
yy++,
tab1,
"modern systems while maintaining reliability in various scenarios. " );
break;

}

/* switch */
Expand Down Expand Up @@ -1922,6 +1976,10 @@ void nwipe_gui_prng( void )
{
nwipe_options.prng = &nwipe_xoroshiro256_prng;
}
if( focus == 5 )
{
nwipe_options.prng = &nwipe_philox_prng;
}
return;

case KEY_BACKSPACE:
Expand Down
12 changes: 11 additions & 1 deletion src/options.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ int nwipe_options_parse( int argc, char** argv )
extern nwipe_prng_t nwipe_isaac64;
extern nwipe_prng_t nwipe_add_lagg_fibonacci_prng;
extern nwipe_prng_t nwipe_xoroshiro256_prng;
extern nwipe_prng_t nwipe_philox_prng;

/* The getopt() result holder. */
int nwipe_opt;
Expand Down Expand Up @@ -503,6 +504,11 @@ int nwipe_options_parse( int argc, char** argv )
nwipe_options.prng = &nwipe_xoroshiro256_prng;
break;
}
if( strcmp( optarg, "philox_prng" ) == 0 )
{
nwipe_options.prng = &nwipe_philox_prng;
break;
}

/* Else we do not know this PRNG. */
fprintf( stderr, "Error: Unknown prng '%s'.\n", optarg );
Expand Down Expand Up @@ -554,7 +560,7 @@ void nwipe_options_log( void )
extern nwipe_prng_t nwipe_isaac64;
extern nwipe_prng_t nwipe_add_lagg_fibonacci_prng;
extern nwipe_prng_t nwipe_xoroshiro256_prng;

extern nwipe_prng_t nwipe_philox_prng;
/**
* Prints a manifest of options to the log.
*/
Expand Down Expand Up @@ -617,6 +623,10 @@ void nwipe_options_log( void )
{
nwipe_log( NWIPE_LOG_NOTICE, " prng = XORoshiro-256 (EXPERIMENTAL!)" );
}
if( nwipe_options.prng == &nwipe_philox_prng )
{
nwipe_log( NWIPE_LOG_NOTICE, " prng = Philox (EXPERIMENTAL!)" );
}
else
{
if( nwipe_options.prng == &nwipe_isaac )
Expand Down
59 changes: 59 additions & 0 deletions src/philox/philox_prng.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#include <immintrin.h> // Für AVX2-Instruktionen
#include <assert.h>
#include <string.h>
#include "philox_prng.h"



// Funktion zur Initialisierung des PRNG-Zustands mit einem Schlüssel
void philox_prng_init(philox_state_t* state, unsigned long init_key[], unsigned long key_length) {
assert(state != NULL && init_key != NULL);

// Initialisiere den Zähler mit Null
state->counter = _mm256_setzero_si256();

// Initialisiere den Schlüssel. Da wir AVX2 verwenden, passen maximal vier 64-Bit-Schlüssel in einen __m256i.
// Fülle den Schlüsselbereich auf, falls key_length < 4 ist.
if (key_length >= 4) {
state->key = _mm256_set_epi64x(init_key[3], init_key[2], init_key[1], init_key[0]);
} else {
unsigned long temp_key[4] = {0, 0, 0, 0}; // Fülle mit Nullen auf
for (unsigned long i = 0; i < key_length; ++i) {
temp_key[i] = init_key[i];
}
state->key = _mm256_set_epi64x(temp_key[3], temp_key[2], temp_key[1], temp_key[0]);
}
}

// Funktion zur Erzeugung von 512 Bit Zufallszahlen und Kopieren in einen Puffer
void philox_prng_genrand_uint512_to_buf(philox_state_t* state, unsigned char* bufpos) {
assert(state != NULL && bufpos != NULL); // Validiere Eingaben

// Temporärer Puffer für 512 Bit (64 Bytes)
unsigned char temp_buffer[64];
memset(temp_buffer, 0, sizeof(temp_buffer)); // Initialisiere temporären Puffer mit Nullen

// Philox-Kernberechnung:
__m256i multiplier = _mm256_set1_epi64x(0xD2B74407B1CE6E93); // Philox-Konstante für Multiplikation
__m256i increment = _mm256_set1_epi64x(0x9E3779B97F4A7C15); // Philox-Konstante für Inkrementierung

// Hauptschleife, die den Zähler und Schlüssel transformiert (Philox-Kern, z.B. 10 Runden)
for (int i = 0; i < 10; ++i) {
// Untere 32 Bit multiplizieren und obere 32 Bit für weitere Operationen nutzen
__m256i lo = _mm256_mul_epu32(state->counter, multiplier); // Untere 32 Bit multiplizieren
__m256i hi = _mm256_srli_epi64(state->counter, 32); // Höhere 32 Bit verschieben

// Schlüssel und Zähler modifizieren
state->counter = _mm256_add_epi64(state->counter, increment);
state->key = _mm256_xor_si256(state->key, hi); // XOR mit den oberen Bits des Zählers
}

// Die Ergebnisse in den temporären Puffer kopieren
_mm256_storeu_si256((__m256i*)temp_buffer, state->counter); // Speichere die ersten 256 Bit
_mm256_storeu_si256((__m256i*)(temp_buffer + 32), state->key); // Speichere die zweiten 256 Bit

// Kopiere die generierten Pseudozufallsdaten in den Zielpuffer
memcpy(bufpos, temp_buffer, sizeof(temp_buffer));
}


48 changes: 48 additions & 0 deletions src/philox/philox_prng.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
* philox_prng.h
* Header-Datei für die optimierte Philox PRNG Implementierung mit AVX2
* Autor: [Ihr Name]
* Datum: [Aktuelles Datum]
*
* Dieses Werk ist gemeinfrei. Es kann von jedermann für beliebige Zwecke
* genutzt werden, ohne jegliche Bedingungen, es sei denn, solche Bedingungen
* sind gesetzlich vorgeschrieben.
*/

#ifndef PHILOX_PRNG_H
#define PHILOX_PRNG_H

#include <stdint.h>
#include <immintrin.h> // Für AVX2-Intrinsics

#ifdef __cplusplus
extern "C" {
#endif

// Anzahl der Runden für den Philox 4x32 Algorithmus
#define NUM_ROUNDS 10 // Anzahl der Runden für Philox 4x32


typedef struct philox_state_s {
__m256i counter; // Verwende 256-Bit AVX2 Register für den Zähler
__m256i key; // Verwende 256-Bit AVX2 Register für den Schlüssel
} philox_state_t;

/* Initialisiert den Philox PRNG Zustand.
- state: Zeiger auf den Philox PRNG Zustandsstruktur.
- init_key: Array, das den Seed-Schlüssel enthält.
- key_length: Länge des Schlüsselarrays. */
void philox_prng_init(philox_state_t* state, unsigned long init_key[], unsigned long key_length);

/* Generiert Pseudorandom-Zahlen und schreibt sie in einen Puffer.
- state: Zeiger auf den initialisierten Philox PRNG Zustand.
- bufpos: Zielpuffer, in den die Pseudorandom-Zahlen geschrieben werden.
Der Puffer sollte mindestens 64 Bytes groß sein. */
void philox_prng_genrand_uint512_to_buf(philox_state_t* state, unsigned char* bufpos);

#ifdef __cplusplus
}
#endif

#endif // PHILOX_PRNG_H

47 changes: 47 additions & 0 deletions src/prng.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "isaac_rand/isaac64.h"
#include "alfg/add_lagg_fibonacci_prng.h" //Lagged Fibonacci generator prototype
#include "xor/xoroshiro256_prng.h" //XORoshiro-256 prototype
#include "philox/philox_prng.h" //Philox prototype

nwipe_prng_t nwipe_twister = { "Mersenne Twister (mt19937ar-cok)", nwipe_twister_init, nwipe_twister_read };

Expand All @@ -40,6 +41,9 @@ nwipe_prng_t nwipe_add_lagg_fibonacci_prng = { "Lagged Fibonacci generator",
/* XOROSHIRO-256 PRNG Structure */
nwipe_prng_t nwipe_xoroshiro256_prng = { "XORoshiro-256", nwipe_xoroshiro256_prng_init, nwipe_xoroshiro256_prng_read };

/* Philox PRNG Structure */
nwipe_prng_t nwipe_philox_prng = { "Philox", nwipe_philox_prng_init, nwipe_philox_prng_read };

/* Print given number of bytes from unsigned integer number to a byte stream buffer starting with low-endian. */
static inline void u32_to_buffer( u8* restrict buffer, u32 val, const int len )
{
Expand Down Expand Up @@ -340,3 +344,46 @@ int nwipe_xoroshiro256_prng_read( NWIPE_PRNG_READ_SIGNATURE )

return 0; // Success
}

/* EXPERIMENTAL implementation of Philox algorithm to provide high-quality, but a lot of random numbers */
int nwipe_philox_prng_init( NWIPE_PRNG_INIT_SIGNATURE )
{
nwipe_log( NWIPE_LOG_NOTICE, "Initialising Philox PRNG" );

if( *state == NULL )
{
/* This is the first time that we have been called. */
*state = malloc( sizeof( philox_state_t ) );
}
philox_prng_init( (philox_state_t*) *state, (uint64_t*) ( seed->s ), seed->length / sizeof( uint64_t ) );

return 0;
}


int nwipe_philox_prng_read( NWIPE_PRNG_READ_SIGNATURE )
{
u8* restrict bufpos = buffer;
size_t words = count / SIZE_OF_PHILOX_PRNG;

/* Loop to fill the buffer with blocks directly from the XORoroshiro256 algorithm */
for( size_t ii = 0; ii < words; ++ii )
{
philox_prng_genrand_uint512_to_buf( (philox_state_t*) *state, bufpos );
bufpos += SIZE_OF_PHILOX_PRNG; // Move to the next block
}

/* Handle remaining bytes if count is not a multiple of SIZE_OF_PHILOX_PRNG */
const size_t remain = count % SIZE_OF_PHILOX_PRNG;
if( remain > 0 )
{
unsigned char temp_output[SIZE_OF_PHILOX_PRNG]; // Temporary buffer for the last block
philox_prng_genrand_uint512_to_buf( (philox_state_t*) *state, temp_output );

// Copy the remaining bytes
memcpy( bufpos, temp_output, remain );
}

return 0; // Success
}

8 changes: 8 additions & 0 deletions src/prng.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ int nwipe_add_lagg_fibonacci_prng_read( NWIPE_PRNG_READ_SIGNATURE );
int nwipe_xoroshiro256_prng_init( NWIPE_PRNG_INIT_SIGNATURE );
int nwipe_xoroshiro256_prng_read( NWIPE_PRNG_READ_SIGNATURE );

/* Philox prototypes. */
int nwipe_philox_prng_init( NWIPE_PRNG_INIT_SIGNATURE );
int nwipe_philox_prng_read( NWIPE_PRNG_READ_SIGNATURE );

/* Size of the twister is not derived from the architecture, but it is strictly 4 bytes */
#define SIZE_OF_TWISTER 4

Expand All @@ -76,4 +80,8 @@ int nwipe_xoroshiro256_prng_read( NWIPE_PRNG_READ_SIGNATURE );
/* Size of the XOROSHIRO-256 is not derived from the architecture, but it is strictly 32 bytes */
#define SIZE_OF_XOROSHIRO256_PRNG 32

/* Size of the Philox is not derived from the architecture, but it is strictly 64 bytes */
#define SIZE_OF_PHILOX_PRNG 64


#endif /* PRNG_H_ */

0 comments on commit 31e6ea4

Please sign in to comment.