Fast Cheap Unreviewed Stream Cipher
I received this code from another friend. Here's a FAST somewhat cryptographically secure pseudo-random generator written by a friend of mine that wishes to remain anonymous. He has given permission for the following code to be placed in the public domain, and to be sent to cypherpunks and others for review. Alas, I have never gotten around to doing so. As it states, on a PowerPC it'll produce bits fast enough to be a pseudo-one-time pad / stream cipher for good digitized video on a LAN (the purpose for which he originally sent it to me) and hence should do point-to-point audio without even breathing hard. For those who haven't heard, the scenario is for point-to-point audio to use an affordable (weak) stream cipher, but to be changing keys frequently using our strongly secure SSL channel. Each costly break would only compromise a few seconds or so of audio. The following code may serve as the affordable (but maybe better than weak) stream cipher. /******************************************************/ /** (c) 1823 Millard Fillmore, all rights expired **/ /** Full words of pseudorandom bits at video speed **/ /******************************************************/ #include <stdlib.h> #include <stdio.h> #include <time.h> #include <sioux.h> #define WORD_SIZE 32 unsigned long shiftRand(); static void refillShiftRand(); void seedShiftRand(unsigned long seed); static unsigned long shiftRandArray[31] = { }; // to hold internal state values static unsigned long * arrayPtr = shiftRandArray; // incremented as state values are consumed static unsigned long * const end = shiftRandArray + 31; // marks end (and need to refill array) // Initializes shiftRandArray to pseudorandom contents using a seed value and the system rand function, // which is assumed to produce 16 bits per call. Low rand bits are obscured by xoring with high bits from other // calls, and all product bits depend on two or more calls to rand. The resulting values are themselves obscured, // not simply revealed at the output, hence this is probably overkill. The seed acts as a key. void seedShiftRand(unsigned long seed) { srand(seed); long cyclesPerWord = ((WORD_SIZE + 7) >> 3) - 1; // will add a net of 8 bits per call to rand unsigned long * arrPtr = shiftRandArray; for (; arrPtr < end; arrPtr++) { unsigned long rand1 = rand(); // wrap this value around word boundary: unsigned long element = (rand1 >> 8) | (rand1 << (WORD_SIZE - 8)); for (long i = 0; i < cyclesPerWord; i++) { element ^= rand() << (8 * i); //previous 8 high bits obscure 8 new low bits } *arrPtr = element; } } // Low-order bits in the following act as an xor-based linear feedback shift register (with moving taps // and stationary data); the repeat period equals (1 << 31) - 1. Higher-order bits have similar // "intrinsic" behavior, but the state of each is determined by additional xoring with a sequence // of carry bits from below, causing repeated period-doubling across the width of the word. // // The choice of 31 and 13 corresponds to a primitive polynomial mod 2; Applied Cryptography has // typos in polynomial coefficients, but the repeat period was tested directly. This code avoids a // shift-direction mistake in Applied Cryptography, and hence follows Numerical Recipes. // static void refillShiftRand() { unsigned long * tap0 = shiftRandArray + 0; unsigned long * tap13 = shiftRandArray + 13; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13 = shiftRandArray; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; tap0++; tap13++; *tap0 = *tap0 + *tap13; return; } // sum at 10 million iterations = 1344076334 /* // Slightly faster than the above on a 604, provided CodeWarrior's instruction-scheduling "optimization" is OFF. static void refillShiftRand() { unsigned long * tap0a = shiftRandArray + 0; unsigned long * tap0b = shiftRandArray + 1; // uses more registers unsigned long * tap13a = shiftRandArray + 13; unsigned long * tap13b = shiftRandArray + 14; *tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2; *tap0b = *tap0b + *tap13b; tap0b += 2; tap13b += 2; *tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2; *tap0b = *tap0b + *tap13b; tap0b += 2; tap13b += 2; *tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2; *tap0b = *tap0b + *tap13b; tap0b += 2; tap13b += 2; *tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2; *tap0b = *tap0b + *tap13b; tap0b += 2; tap13b += 2; *tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2; *tap0b = *tap0b + *tap13b; tap0b += 2; tap13b += 2; *tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2; *tap0b = *tap0b + *tap13b; tap0b += 2; tap13b += 2; *tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2; *tap0b = *tap0b + *tap13b; tap0b += 2; tap13b += 2; *tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2; *tap0b = *tap0b + *tap13b; tap0b += 2; tap13b += 2; *tap0a = *tap0a + *tap13a; tap0a += 2; tap13a = shiftRandArray; *tap0b = *tap0b + *tap13b; tap0b += 2; tap13b = (shiftRandArray + 1); *tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2; *tap0b = *tap0b + *tap13b; tap0b += 2; tap13b += 2; *tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2; *tap0b = *tap0b + *tap13b; tap0b += 2; tap13b += 2; *tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2; *tap0b = *tap0b + *tap13b; tap0b += 2; tap13b += 2; *tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2; *tap0b = *tap0b + *tap13b; tap0b += 2; tap13b += 2; *tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2; *tap0b = *tap0b + *tap13b; tap0b += 2; tap13b += 2; *tap0a = *tap0a + *tap13a; tap0a += 2; tap13a += 2; *tap0b = *tap0b + *tap13b; *tap0a = *tap0a + *tap13a; return; } // sum at 10 million iterations = 1344076334 */ // The array-filling mechanism above provides an information-conserving mechanism for producing // pseudorandom numbers with an effectively unlimited repeat period, fast, but based on a large number of // internal state bits, and having a bit of a given order eventually depend on all bits of the same or // lower order in all words in the array. // Rare, transient states having almost all zeros in the low bits will show strong correlations, // since the restoration of normal bit statistics will take several cycles. (The all-zero state // is inaccessible unless it is the starting condition -- one might test for this.) Correlations // in the frequency of low-order zero bits in sequentially generated array elements are eliminated // in the output by xoring high bits with the low bits; adding a preceding squaring operation // adds the effect of making any given output consistent with a large number of array element states, // making inferences regarding the internal state of the system relatively difficult. inline unsigned long shiftRand() { ((arrayPtr == end) ? // '?:' instead of 'if' to enable inlining by CW compiler (refillShiftRand(), arrayPtr = shiftRandArray, 0) : 0); unsigned long output = *arrayPtr++; unsigned long square = output * output; // this multiplication seems free on a 605 return(square + (output >> (WORD_SIZE / 2))); } // With multiplication (above) and summing of output to prevent bogus optimizations, 16.5 clocks per word. // Test code for CodeWarrier environment. void main () { SIOUXSettings.asktosaveonclose = FALSE; unsigned long sum = 0; seedShiftRand(12345); // easily guessed seed long start = clock(); for (long i = 0; i < 2000000; i++) { sum += shiftRand(); sum += shiftRand(); sum += shiftRand(); sum += shiftRand(); sum += shiftRand(); } unsigned long end = clock(); printf("\n sum = %u", sum); printf("\n\n time = %d \n", (int)(1000.0 * ((float)(end - start) / (float)CLOCKS_PER_SEC))); }
participants (1)
-
Bill Frantz