»Core Development>Code coverage>Modules/_sha3/keccak/KeccakF-1600-opt64.c

Python code coverage for Modules/_sha3/keccak/KeccakF-1600-opt64.c

#countcontent
1n/a/*
2n/aThe Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
3n/aMichaël Peeters and Gilles Van Assche. For more information, feedback or
4n/aquestions, please refer to our website: http://keccak.noekeon.org/
5n/a
6n/aImplementation by the designers,
7n/ahereby denoted as "the implementer".
8n/a
9n/aTo the extent possible under law, the implementer has waived all copyright
10n/aand related or neighboring rights to the source code in this file.
11n/ahttp://creativecommons.org/publicdomain/zero/1.0/
12n/a*/
13n/a
14n/a#include <string.h>
15n/a/* #include "brg_endian.h" */
16n/a#include "KeccakF-1600-opt64-settings.h"
17n/a#include "KeccakF-1600-interface.h"
18n/a
19n/atypedef unsigned char UINT8;
20n/a/* typedef unsigned long long int UINT64; */
21n/a
22n/a#if defined(__GNUC__)
23n/a#define ALIGN __attribute__ ((aligned(32)))
24n/a#elif defined(_MSC_VER)
25n/a#define ALIGN __declspec(align(32))
26n/a#else
27n/a#define ALIGN
28n/a#endif
29n/a
30n/a#if defined(UseSSE)
31n/a #include <x86intrin.h>
32n/a typedef __m128i V64;
33n/a typedef __m128i V128;
34n/a typedef union {
35n/a V128 v128;
36n/a UINT64 v64[2];
37n/a } V6464;
38n/a
39n/a #define ANDnu64(a, b) _mm_andnot_si128(a, b)
40n/a #define LOAD64(a) _mm_loadl_epi64((const V64 *)&(a))
41n/a #define CONST64(a) _mm_loadl_epi64((const V64 *)&(a))
42n/a #define ROL64(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
43n/a #define STORE64(a, b) _mm_storel_epi64((V64 *)&(a), b)
44n/a #define XOR64(a, b) _mm_xor_si128(a, b)
45n/a #define XOReq64(a, b) a = _mm_xor_si128(a, b)
46n/a #define SHUFFLEBYTES128(a, b) _mm_shuffle_epi8(a, b)
47n/a
48n/a #define ANDnu128(a, b) _mm_andnot_si128(a, b)
49n/a #define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b))
50n/a #define CONST128(a) _mm_load_si128((const V128 *)&(a))
51n/a #define LOAD128(a) _mm_load_si128((const V128 *)&(a))
52n/a #define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a))
53n/a #define ROL64in128(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
54n/a #define STORE128(a, b) _mm_store_si128((V128 *)&(a), b)
55n/a #define XOR128(a, b) _mm_xor_si128(a, b)
56n/a #define XOReq128(a, b) a = _mm_xor_si128(a, b)
57n/a #define GET64LOLO(a, b) _mm_unpacklo_epi64(a, b)
58n/a #define GET64HIHI(a, b) _mm_unpackhi_epi64(a, b)
59n/a #define COPY64HI2LO(a) _mm_shuffle_epi32(a, 0xEE)
60n/a #define COPY64LO2HI(a) _mm_shuffle_epi32(a, 0x44)
61n/a #define ZERO128() _mm_setzero_si128()
62n/a
63n/a #ifdef UseOnlySIMD64
64n/a #include "KeccakF-1600-simd64.macros"
65n/a #else
66n/aALIGN const UINT64 rho8_56[2] = {0x0605040302010007, 0x080F0E0D0C0B0A09};
67n/a #include "KeccakF-1600-simd128.macros"
68n/a #endif
69n/a
70n/a #ifdef UseBebigokimisa
71n/a #error "UseBebigokimisa cannot be used in combination with UseSSE"
72n/a #endif
73n/a#elif defined(UseXOP)
74n/a #include <x86intrin.h>
75n/a typedef __m128i V64;
76n/a typedef __m128i V128;
77n/a
78n/a #define LOAD64(a) _mm_loadl_epi64((const V64 *)&(a))
79n/a #define CONST64(a) _mm_loadl_epi64((const V64 *)&(a))
80n/a #define STORE64(a, b) _mm_storel_epi64((V64 *)&(a), b)
81n/a #define XOR64(a, b) _mm_xor_si128(a, b)
82n/a #define XOReq64(a, b) a = _mm_xor_si128(a, b)
83n/a
84n/a #define ANDnu128(a, b) _mm_andnot_si128(a, b)
85n/a #define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b))
86n/a #define CONST128(a) _mm_load_si128((const V128 *)&(a))
87n/a #define LOAD128(a) _mm_load_si128((const V128 *)&(a))
88n/a #define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a))
89n/a #define STORE128(a, b) _mm_store_si128((V128 *)&(a), b)
90n/a #define XOR128(a, b) _mm_xor_si128(a, b)
91n/a #define XOReq128(a, b) a = _mm_xor_si128(a, b)
92n/a #define ZERO128() _mm_setzero_si128()
93n/a
94n/a #define SWAP64(a) _mm_shuffle_epi32(a, 0x4E)
95n/a #define GET64LOLO(a, b) _mm_unpacklo_epi64(a, b)
96n/a #define GET64HIHI(a, b) _mm_unpackhi_epi64(a, b)
97n/a #define GET64LOHI(a, b) ((__m128i)_mm_blend_pd((__m128d)a, (__m128d)b, 2))
98n/a #define GET64HILO(a, b) SWAP64(GET64LOHI(b, a))
99n/a #define COPY64HI2LO(a) _mm_shuffle_epi32(a, 0xEE)
100n/a #define COPY64LO2HI(a) _mm_shuffle_epi32(a, 0x44)
101n/a
102n/a #define ROL6464same(a, o) _mm_roti_epi64(a, o)
103n/a #define ROL6464(a, r1, r2) _mm_rot_epi64(a, CONST128( rot_##r1##_##r2 ))
104n/aALIGN const UINT64 rot_0_20[2] = { 0, 20};
105n/aALIGN const UINT64 rot_44_3[2] = {44, 3};
106n/aALIGN const UINT64 rot_43_45[2] = {43, 45};
107n/aALIGN const UINT64 rot_21_61[2] = {21, 61};
108n/aALIGN const UINT64 rot_14_28[2] = {14, 28};
109n/aALIGN const UINT64 rot_1_36[2] = { 1, 36};
110n/aALIGN const UINT64 rot_6_10[2] = { 6, 10};
111n/aALIGN const UINT64 rot_25_15[2] = {25, 15};
112n/aALIGN const UINT64 rot_8_56[2] = { 8, 56};
113n/aALIGN const UINT64 rot_18_27[2] = {18, 27};
114n/aALIGN const UINT64 rot_62_55[2] = {62, 55};
115n/aALIGN const UINT64 rot_39_41[2] = {39, 41};
116n/a
117n/a#if defined(UseSimulatedXOP)
118n/a /* For debugging purposes, when XOP is not available */
119n/a #undef ROL6464
120n/a #undef ROL6464same
121n/a #define ROL6464same(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
122n/a V128 ROL6464(V128 a, int r0, int r1)
123n/a {
124n/a V128 a0 = ROL64(a, r0);
125n/a V128 a1 = COPY64HI2LO(ROL64(a, r1));
126n/a return GET64LOLO(a0, a1);
127n/a }
128n/a#endif
129n/a
130n/a #include "KeccakF-1600-xop.macros"
131n/a
132n/a #ifdef UseBebigokimisa
133n/a #error "UseBebigokimisa cannot be used in combination with UseXOP"
134n/a #endif
135n/a#elif defined(UseMMX)
136n/a #include <mmintrin.h>
137n/a typedef __m64 V64;
138n/a #define ANDnu64(a, b) _mm_andnot_si64(a, b)
139n/a
140n/a #if (defined(_MSC_VER) || defined (__INTEL_COMPILER))
141n/a #define LOAD64(a) *(V64*)&(a)
142n/a #define CONST64(a) *(V64*)&(a)
143n/a #define STORE64(a, b) *(V64*)&(a) = b
144n/a #else
145n/a #define LOAD64(a) (V64)a
146n/a #define CONST64(a) (V64)a
147n/a #define STORE64(a, b) a = (UINT64)b
148n/a #endif
149n/a #define ROL64(a, o) _mm_or_si64(_mm_slli_si64(a, o), _mm_srli_si64(a, 64-(o)))
150n/a #define XOR64(a, b) _mm_xor_si64(a, b)
151n/a #define XOReq64(a, b) a = _mm_xor_si64(a, b)
152n/a
153n/a #include "KeccakF-1600-simd64.macros"
154n/a
155n/a #ifdef UseBebigokimisa
156n/a #error "UseBebigokimisa cannot be used in combination with UseMMX"
157n/a #endif
158n/a#else
159n/a #if defined(_MSC_VER)
160n/a #define ROL64(a, offset) _rotl64(a, offset)
161n/a #elif defined(UseSHLD)
162n/a #define ROL64(x,N) ({ \
163n/a register UINT64 __out; \
164n/a register UINT64 __in = x; \
165n/a __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \
166n/a __out; \
167n/a })
168n/a #else
169n/a #define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset)))
170n/a #endif
171n/a
172n/a #include "KeccakF-1600-64.macros"
173n/a#endif
174n/a
175n/a#include "KeccakF-1600-unrolling.macros"
176n/a
177n/astatic void KeccakPermutationOnWords(UINT64 *state)
178n/a{
179n/a declareABCDE
180n/a#if (Unrolling != 24)
181n/a unsigned int i;
182n/a#endif
183n/a
184n/a copyFromState(A, state)
185n/a rounds
186n/a#if defined(UseMMX)
187n/a _mm_empty();
188n/a#endif
189n/a}
190n/a
191n/astatic void KeccakPermutationOnWordsAfterXoring(UINT64 *state, const UINT64 *input, unsigned int laneCount)
192n/a{
193n/a declareABCDE
194n/a#if (Unrolling != 24)
195n/a unsigned int i;
196n/a#endif
197n/a unsigned int j;
198n/a
199n/a for(j=0; j<laneCount; j++)
200n/a state[j] ^= input[j];
201n/a copyFromState(A, state)
202n/a rounds
203n/a#if defined(UseMMX)
204n/a _mm_empty();
205n/a#endif
206n/a}
207n/a
208n/a#ifdef ProvideFast576
209n/astatic void KeccakPermutationOnWordsAfterXoring576bits(UINT64 *state, const UINT64 *input)
210n/a{
211n/a declareABCDE
212n/a#if (Unrolling != 24)
213n/a unsigned int i;
214n/a#endif
215n/a
216n/a copyFromStateAndXor576bits(A, state, input)
217n/a rounds
218n/a#if defined(UseMMX)
219n/a _mm_empty();
220n/a#endif
221n/a}
222n/a#endif
223n/a
224n/a#ifdef ProvideFast832
225n/astatic void KeccakPermutationOnWordsAfterXoring832bits(UINT64 *state, const UINT64 *input)
226n/a{
227n/a declareABCDE
228n/a#if (Unrolling != 24)
229n/a unsigned int i;
230n/a#endif
231n/a
232n/a copyFromStateAndXor832bits(A, state, input)
233n/a rounds
234n/a#if defined(UseMMX)
235n/a _mm_empty();
236n/a#endif
237n/a}
238n/a#endif
239n/a
240n/a#ifdef ProvideFast1024
241n/astatic void KeccakPermutationOnWordsAfterXoring1024bits(UINT64 *state, const UINT64 *input)
242n/a{
243n/a declareABCDE
244n/a#if (Unrolling != 24)
245n/a unsigned int i;
246n/a#endif
247n/a
248n/a copyFromStateAndXor1024bits(A, state, input)
249n/a rounds
250n/a#if defined(UseMMX)
251n/a _mm_empty();
252n/a#endif
253n/a}
254n/a#endif
255n/a
256n/a#ifdef ProvideFast1088
257n/astatic void KeccakPermutationOnWordsAfterXoring1088bits(UINT64 *state, const UINT64 *input)
258n/a{
259n/a declareABCDE
260n/a#if (Unrolling != 24)
261n/a unsigned int i;
262n/a#endif
263n/a
264n/a copyFromStateAndXor1088bits(A, state, input)
265n/a rounds
266n/a#if defined(UseMMX)
267n/a _mm_empty();
268n/a#endif
269n/a}
270n/a#endif
271n/a
272n/a#ifdef ProvideFast1152
273n/astatic void KeccakPermutationOnWordsAfterXoring1152bits(UINT64 *state, const UINT64 *input)
274n/a{
275n/a declareABCDE
276n/a#if (Unrolling != 24)
277n/a unsigned int i;
278n/a#endif
279n/a
280n/a copyFromStateAndXor1152bits(A, state, input)
281n/a rounds
282n/a#if defined(UseMMX)
283n/a _mm_empty();
284n/a#endif
285n/a}
286n/a#endif
287n/a
288n/a#ifdef ProvideFast1344
289n/astatic void KeccakPermutationOnWordsAfterXoring1344bits(UINT64 *state, const UINT64 *input)
290n/a{
291n/a declareABCDE
292n/a#if (Unrolling != 24)
293n/a unsigned int i;
294n/a#endif
295n/a
296n/a copyFromStateAndXor1344bits(A, state, input)
297n/a rounds
298n/a#if defined(UseMMX)
299n/a _mm_empty();
300n/a#endif
301n/a}
302n/a#endif
303n/a
304n/astatic void KeccakInitialize()
305n/a{
306n/a}
307n/a
308n/astatic void KeccakInitializeState(unsigned char *state)
309n/a{
310n/a memset(state, 0, 200);
311n/a#ifdef UseBebigokimisa
312n/a ((UINT64*)state)[ 1] = ~(UINT64)0;
313n/a ((UINT64*)state)[ 2] = ~(UINT64)0;
314n/a ((UINT64*)state)[ 8] = ~(UINT64)0;
315n/a ((UINT64*)state)[12] = ~(UINT64)0;
316n/a ((UINT64*)state)[17] = ~(UINT64)0;
317n/a ((UINT64*)state)[20] = ~(UINT64)0;
318n/a#endif
319n/a}
320n/a
321n/astatic void KeccakPermutation(unsigned char *state)
322n/a{
323n/a /* We assume the state is always stored as words */
324n/a KeccakPermutationOnWords((UINT64*)state);
325n/a}
326n/a
327n/a#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
328n/astatic void fromBytesToWord(UINT64 *word, const UINT8 *bytes)
329n/a{
330n/a unsigned int i;
331n/a
332n/a *word = 0;
333n/a for(i=0; i<(64/8); i++)
334n/a *word |= (UINT64)(bytes[i]) << (8*i);
335n/a}
336n/a#endif
337n/a
338n/a
339n/a#ifdef ProvideFast576
340n/astatic void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data)
341n/a{
342n/a#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
343n/a KeccakPermutationOnWordsAfterXoring576bits((UINT64*)state, (const UINT64*)data);
344n/a#else
345n/a UINT64 dataAsWords[9];
346n/a unsigned int i;
347n/a
348n/a for(i=0; i<9; i++)
349n/a fromBytesToWord(dataAsWords+i, data+(i*8));
350n/a KeccakPermutationOnWordsAfterXoring576bits((UINT64*)state, dataAsWords);
351n/a#endif
352n/a}
353n/a#endif
354n/a
355n/a#ifdef ProvideFast832
356n/astatic void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data)
357n/a{
358n/a#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
359n/a KeccakPermutationOnWordsAfterXoring832bits((UINT64*)state, (const UINT64*)data);
360n/a#else
361n/a UINT64 dataAsWords[13];
362n/a unsigned int i;
363n/a
364n/a for(i=0; i<13; i++)
365n/a fromBytesToWord(dataAsWords+i, data+(i*8));
366n/a KeccakPermutationOnWordsAfterXoring832bits((UINT64*)state, dataAsWords);
367n/a#endif
368n/a}
369n/a#endif
370n/a
371n/a#ifdef ProvideFast1024
372n/astatic void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data)
373n/a{
374n/a#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
375n/a KeccakPermutationOnWordsAfterXoring1024bits((UINT64*)state, (const UINT64*)data);
376n/a#else
377n/a UINT64 dataAsWords[16];
378n/a unsigned int i;
379n/a
380n/a for(i=0; i<16; i++)
381n/a fromBytesToWord(dataAsWords+i, data+(i*8));
382n/a KeccakPermutationOnWordsAfterXoring1024bits((UINT64*)state, dataAsWords);
383n/a#endif
384n/a}
385n/a#endif
386n/a
387n/a#ifdef ProvideFast1088
388n/astatic void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data)
389n/a{
390n/a#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
391n/a KeccakPermutationOnWordsAfterXoring1088bits((UINT64*)state, (const UINT64*)data);
392n/a#else
393n/a UINT64 dataAsWords[17];
394n/a unsigned int i;
395n/a
396n/a for(i=0; i<17; i++)
397n/a fromBytesToWord(dataAsWords+i, data+(i*8));
398n/a KeccakPermutationOnWordsAfterXoring1088bits((UINT64*)state, dataAsWords);
399n/a#endif
400n/a}
401n/a#endif
402n/a
403n/a#ifdef ProvideFast1152
404n/astatic void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data)
405n/a{
406n/a#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
407n/a KeccakPermutationOnWordsAfterXoring1152bits((UINT64*)state, (const UINT64*)data);
408n/a#else
409n/a UINT64 dataAsWords[18];
410n/a unsigned int i;
411n/a
412n/a for(i=0; i<18; i++)
413n/a fromBytesToWord(dataAsWords+i, data+(i*8));
414n/a KeccakPermutationOnWordsAfterXoring1152bits((UINT64*)state, dataAsWords);
415n/a#endif
416n/a}
417n/a#endif
418n/a
419n/a#ifdef ProvideFast1344
420n/astatic void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data)
421n/a{
422n/a#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
423n/a KeccakPermutationOnWordsAfterXoring1344bits((UINT64*)state, (const UINT64*)data);
424n/a#else
425n/a UINT64 dataAsWords[21];
426n/a unsigned int i;
427n/a
428n/a for(i=0; i<21; i++)
429n/a fromBytesToWord(dataAsWords+i, data+(i*8));
430n/a KeccakPermutationOnWordsAfterXoring1344bits((UINT64*)state, dataAsWords);
431n/a#endif
432n/a}
433n/a#endif
434n/a
435n/astatic void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount)
436n/a{
437n/a#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
438n/a KeccakPermutationOnWordsAfterXoring((UINT64*)state, (const UINT64*)data, laneCount);
439n/a#else
440n/a UINT64 dataAsWords[25];
441n/a unsigned int i;
442n/a
443n/a for(i=0; i<laneCount; i++)
444n/a fromBytesToWord(dataAsWords+i, data+(i*8));
445n/a KeccakPermutationOnWordsAfterXoring((UINT64*)state, dataAsWords, laneCount);
446n/a#endif
447n/a}
448n/a
449n/a#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
450n/astatic void fromWordToBytes(UINT8 *bytes, const UINT64 word)
451n/a{
452n/a unsigned int i;
453n/a
454n/a for(i=0; i<(64/8); i++)
455n/a bytes[i] = (word >> (8*i)) & 0xFF;
456n/a}
457n/a#endif
458n/a
459n/a
460n/a#ifdef ProvideFast1024
461n/astatic void KeccakExtract1024bits(const unsigned char *state, unsigned char *data)
462n/a{
463n/a#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
464n/a memcpy(data, state, 128);
465n/a#else
466n/a unsigned int i;
467n/a
468n/a for(i=0; i<16; i++)
469n/a fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
470n/a#endif
471n/a#ifdef UseBebigokimisa
472n/a ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
473n/a ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
474n/a ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
475n/a ((UINT64*)data)[12] = ~((UINT64*)data)[12];
476n/a#endif
477n/a}
478n/a#endif
479n/a
480n/astatic void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount)
481n/a{
482n/a#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
483n/a memcpy(data, state, laneCount*8);
484n/a#else
485n/a unsigned int i;
486n/a
487n/a for(i=0; i<laneCount; i++)
488n/a fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
489n/a#endif
490n/a#ifdef UseBebigokimisa
491n/a if (laneCount > 1) {
492n/a ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
493n/a if (laneCount > 2) {
494n/a ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
495n/a if (laneCount > 8) {
496n/a ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
497n/a if (laneCount > 12) {
498n/a ((UINT64*)data)[12] = ~((UINT64*)data)[12];
499n/a if (laneCount > 17) {
500n/a ((UINT64*)data)[17] = ~((UINT64*)data)[17];
501n/a if (laneCount > 20) {
502n/a ((UINT64*)data)[20] = ~((UINT64*)data)[20];
503n/a }
504n/a }
505n/a }
506n/a }
507n/a }
508n/a }
509n/a#endif
510n/a}