ยปCore Development>Code coverage>Modules/_blake2/impl/blake2b.c

Python code coverage for Modules/_blake2/impl/blake2b.c

#countcontent
1n/a/*
2n/a BLAKE2 reference source code package - optimized C implementations
3n/a
4n/a Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
5n/a terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
6n/a your option. The terms of these licenses can be found at:
7n/a
8n/a - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
9n/a - OpenSSL license : https://www.openssl.org/source/license.html
10n/a - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
11n/a
12n/a More information about the BLAKE2 hash function can be found at
13n/a https://blake2.net.
14n/a*/
15n/a
16n/a#include <stdint.h>
17n/a#include <string.h>
18n/a#include <stdio.h>
19n/a
20n/a#include "blake2.h"
21n/a#include "blake2-impl.h"
22n/a
23n/a#include "blake2-config.h"
24n/a
25n/a#ifdef _MSC_VER
26n/a#include <intrin.h> /* for _mm_set_epi64x */
27n/a#endif
28n/a#include <emmintrin.h>
29n/a#if defined(HAVE_SSSE3)
30n/a#include <tmmintrin.h>
31n/a#endif
32n/a#if defined(HAVE_SSE41)
33n/a#include <smmintrin.h>
34n/a#endif
35n/a#if defined(HAVE_AVX)
36n/a#include <immintrin.h>
37n/a#endif
38n/a#if defined(HAVE_XOP)
39n/a#include <x86intrin.h>
40n/a#endif
41n/a
42n/a#include "blake2b-round.h"
43n/a
44n/astatic const uint64_t blake2b_IV[8] =
45n/a{
46n/a 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
47n/a 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
48n/a 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
49n/a 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
50n/a};
51n/a
52n/astatic const uint8_t blake2b_sigma[12][16] =
53n/a{
54n/a { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
55n/a { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
56n/a { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
57n/a { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
58n/a { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
59n/a { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
60n/a { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
61n/a { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
62n/a { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
63n/a { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
64n/a { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
65n/a { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
66n/a};
67n/a
68n/a
69n/a/* Some helper functions, not necessarily useful */
70n/aBLAKE2_LOCAL_INLINE(int) blake2b_set_lastnode( blake2b_state *S )
71n/a{
72n/a S->f[1] = -1;
73n/a return 0;
74n/a}
75n/a
76n/aBLAKE2_LOCAL_INLINE(int) blake2b_clear_lastnode( blake2b_state *S )
77n/a{
78n/a S->f[1] = 0;
79n/a return 0;
80n/a}
81n/a
82n/aBLAKE2_LOCAL_INLINE(int) blake2b_is_lastblock( const blake2b_state *S )
83n/a{
84n/a return S->f[0] != 0;
85n/a}
86n/a
87n/aBLAKE2_LOCAL_INLINE(int) blake2b_set_lastblock( blake2b_state *S )
88n/a{
89n/a if( S->last_node ) blake2b_set_lastnode( S );
90n/a
91n/a S->f[0] = -1;
92n/a return 0;
93n/a}
94n/a
95n/aBLAKE2_LOCAL_INLINE(int) blake2b_clear_lastblock( blake2b_state *S )
96n/a{
97n/a if( S->last_node ) blake2b_clear_lastnode( S );
98n/a
99n/a S->f[0] = 0;
100n/a return 0;
101n/a}
102n/a
103n/a
104n/aBLAKE2_LOCAL_INLINE(int) blake2b_increment_counter( blake2b_state *S, const uint64_t inc )
105n/a{
106n/a#if __x86_64__
107n/a /* ADD/ADC chain */
108n/a __uint128_t t = ( ( __uint128_t )S->t[1] << 64 ) | S->t[0];
109n/a t += inc;
110n/a S->t[0] = ( uint64_t )( t >> 0 );
111n/a S->t[1] = ( uint64_t )( t >> 64 );
112n/a#else
113n/a S->t[0] += inc;
114n/a S->t[1] += ( S->t[0] < inc );
115n/a#endif
116n/a return 0;
117n/a}
118n/a
119n/a
120n/a/* Parameter-related functions */
121n/aBLAKE2_LOCAL_INLINE(int) blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length )
122n/a{
123n/a P->digest_length = digest_length;
124n/a return 0;
125n/a}
126n/a
127n/aBLAKE2_LOCAL_INLINE(int) blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout )
128n/a{
129n/a P->fanout = fanout;
130n/a return 0;
131n/a}
132n/a
133n/aBLAKE2_LOCAL_INLINE(int) blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth )
134n/a{
135n/a P->depth = depth;
136n/a return 0;
137n/a}
138n/a
139n/aBLAKE2_LOCAL_INLINE(int) blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length )
140n/a{
141n/a P->leaf_length = leaf_length;
142n/a return 0;
143n/a}
144n/a
145n/aBLAKE2_LOCAL_INLINE(int) blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset )
146n/a{
147n/a P->node_offset = node_offset;
148n/a return 0;
149n/a}
150n/a
151n/aBLAKE2_LOCAL_INLINE(int) blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth )
152n/a{
153n/a P->node_depth = node_depth;
154n/a return 0;
155n/a}
156n/a
157n/aBLAKE2_LOCAL_INLINE(int) blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length )
158n/a{
159n/a P->inner_length = inner_length;
160n/a return 0;
161n/a}
162n/a
163n/aBLAKE2_LOCAL_INLINE(int) blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] )
164n/a{
165n/a memcpy( P->salt, salt, BLAKE2B_SALTBYTES );
166n/a return 0;
167n/a}
168n/a
169n/aBLAKE2_LOCAL_INLINE(int) blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] )
170n/a{
171n/a memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES );
172n/a return 0;
173n/a}
174n/a
175n/aBLAKE2_LOCAL_INLINE(int) blake2b_init0( blake2b_state *S )
176n/a{
177n/a int i;
178n/a memset( S, 0, sizeof( blake2b_state ) );
179n/a
180n/a for( i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i];
181n/a
182n/a return 0;
183n/a}
184n/a
185n/a/* init xors IV with input parameter block */
186n/aint blake2b_init_param( blake2b_state *S, const blake2b_param *P )
187n/a{
188n/a /*blake2b_init0( S ); */
189n/a const uint8_t * v = ( const uint8_t * )( blake2b_IV );
190n/a const uint8_t * p = ( const uint8_t * )( P );
191n/a uint8_t * h = ( uint8_t * )( S->h );
192n/a int i;
193n/a /* IV XOR ParamBlock */
194n/a memset( S, 0, sizeof( blake2b_state ) );
195n/a
196n/a for( i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
197n/a
198n/a return 0;
199n/a}
200n/a
201n/a
202n/a/* Some sort of default parameter block initialization, for sequential blake2b */
203n/aint blake2b_init( blake2b_state *S, const uint8_t outlen )
204n/a{
205n/a const blake2b_param P =
206n/a {
207n/a outlen,
208n/a 0,
209n/a 1,
210n/a 1,
211n/a 0,
212n/a 0,
213n/a 0,
214n/a 0,
215n/a {0},
216n/a {0},
217n/a {0}
218n/a };
219n/a
220n/a if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
221n/a
222n/a return blake2b_init_param( S, &P );
223n/a}
224n/a
225n/aint blake2b_init_key( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen )
226n/a{
227n/a const blake2b_param P =
228n/a {
229n/a outlen,
230n/a keylen,
231n/a 1,
232n/a 1,
233n/a 0,
234n/a 0,
235n/a 0,
236n/a 0,
237n/a {0},
238n/a {0},
239n/a {0}
240n/a };
241n/a
242n/a if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
243n/a
244n/a if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1;
245n/a
246n/a if( blake2b_init_param( S, &P ) < 0 )
247n/a return 0;
248n/a
249n/a {
250n/a uint8_t block[BLAKE2B_BLOCKBYTES];
251n/a memset( block, 0, BLAKE2B_BLOCKBYTES );
252n/a memcpy( block, key, keylen );
253n/a blake2b_update( S, block, BLAKE2B_BLOCKBYTES );
254n/a secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
255n/a }
256n/a return 0;
257n/a}
258n/a
259n/aBLAKE2_LOCAL_INLINE(int) blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
260n/a{
261n/a __m128i row1l, row1h;
262n/a __m128i row2l, row2h;
263n/a __m128i row3l, row3h;
264n/a __m128i row4l, row4h;
265n/a __m128i b0, b1;
266n/a __m128i t0, t1;
267n/a#if defined(HAVE_SSSE3) && !defined(HAVE_XOP)
268n/a const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 );
269n/a const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 );
270n/a#endif
271n/a#if defined(HAVE_SSE41)
272n/a const __m128i m0 = LOADU( block + 00 );
273n/a const __m128i m1 = LOADU( block + 16 );
274n/a const __m128i m2 = LOADU( block + 32 );
275n/a const __m128i m3 = LOADU( block + 48 );
276n/a const __m128i m4 = LOADU( block + 64 );
277n/a const __m128i m5 = LOADU( block + 80 );
278n/a const __m128i m6 = LOADU( block + 96 );
279n/a const __m128i m7 = LOADU( block + 112 );
280n/a#else
281n/a const uint64_t m0 = ( ( uint64_t * )block )[ 0];
282n/a const uint64_t m1 = ( ( uint64_t * )block )[ 1];
283n/a const uint64_t m2 = ( ( uint64_t * )block )[ 2];
284n/a const uint64_t m3 = ( ( uint64_t * )block )[ 3];
285n/a const uint64_t m4 = ( ( uint64_t * )block )[ 4];
286n/a const uint64_t m5 = ( ( uint64_t * )block )[ 5];
287n/a const uint64_t m6 = ( ( uint64_t * )block )[ 6];
288n/a const uint64_t m7 = ( ( uint64_t * )block )[ 7];
289n/a const uint64_t m8 = ( ( uint64_t * )block )[ 8];
290n/a const uint64_t m9 = ( ( uint64_t * )block )[ 9];
291n/a const uint64_t m10 = ( ( uint64_t * )block )[10];
292n/a const uint64_t m11 = ( ( uint64_t * )block )[11];
293n/a const uint64_t m12 = ( ( uint64_t * )block )[12];
294n/a const uint64_t m13 = ( ( uint64_t * )block )[13];
295n/a const uint64_t m14 = ( ( uint64_t * )block )[14];
296n/a const uint64_t m15 = ( ( uint64_t * )block )[15];
297n/a#endif
298n/a row1l = LOADU( &S->h[0] );
299n/a row1h = LOADU( &S->h[2] );
300n/a row2l = LOADU( &S->h[4] );
301n/a row2h = LOADU( &S->h[6] );
302n/a row3l = LOADU( &blake2b_IV[0] );
303n/a row3h = LOADU( &blake2b_IV[2] );
304n/a row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) );
305n/a row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) );
306n/a ROUND( 0 );
307n/a ROUND( 1 );
308n/a ROUND( 2 );
309n/a ROUND( 3 );
310n/a ROUND( 4 );
311n/a ROUND( 5 );
312n/a ROUND( 6 );
313n/a ROUND( 7 );
314n/a ROUND( 8 );
315n/a ROUND( 9 );
316n/a ROUND( 10 );
317n/a ROUND( 11 );
318n/a row1l = _mm_xor_si128( row3l, row1l );
319n/a row1h = _mm_xor_si128( row3h, row1h );
320n/a STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) );
321n/a STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) );
322n/a row2l = _mm_xor_si128( row4l, row2l );
323n/a row2h = _mm_xor_si128( row4h, row2h );
324n/a STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) );
325n/a STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) );
326n/a return 0;
327n/a}
328n/a
329n/a
330n/aint blake2b_update( blake2b_state *S, const uint8_t *in, uint64_t inlen )
331n/a{
332n/a while( inlen > 0 )
333n/a {
334n/a size_t left = S->buflen;
335n/a size_t fill = 2 * BLAKE2B_BLOCKBYTES - left;
336n/a
337n/a if( inlen > fill )
338n/a {
339n/a memcpy( S->buf + left, in, fill ); /* Fill buffer */
340n/a S->buflen += fill;
341n/a blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
342n/a blake2b_compress( S, S->buf ); /* Compress */
343n/a memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); /* Shift buffer left */
344n/a S->buflen -= BLAKE2B_BLOCKBYTES;
345n/a in += fill;
346n/a inlen -= fill;
347n/a }
348n/a else /* inlen <= fill */
349n/a {
350n/a memcpy( S->buf + left, in, inlen );
351n/a S->buflen += inlen; /* Be lazy, do not compress */
352n/a in += inlen;
353n/a inlen -= inlen;
354n/a }
355n/a }
356n/a
357n/a return 0;
358n/a}
359n/a
360n/a
361n/aint blake2b_final( blake2b_state *S, uint8_t *out, uint8_t outlen )
362n/a{
363n/a if( outlen > BLAKE2B_OUTBYTES )
364n/a return -1;
365n/a
366n/a if( blake2b_is_lastblock( S ) )
367n/a return -1;
368n/a
369n/a if( S->buflen > BLAKE2B_BLOCKBYTES )
370n/a {
371n/a blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
372n/a blake2b_compress( S, S->buf );
373n/a S->buflen -= BLAKE2B_BLOCKBYTES;
374n/a memmove( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen );
375n/a }
376n/a
377n/a blake2b_increment_counter( S, S->buflen );
378n/a blake2b_set_lastblock( S );
379n/a memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
380n/a blake2b_compress( S, S->buf );
381n/a memcpy( out, &S->h[0], outlen );
382n/a return 0;
383n/a}
384n/a
385n/a
386n/aint blake2b( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen )
387n/a{
388n/a blake2b_state S[1];
389n/a
390n/a /* Verify parameters */
391n/a if ( NULL == in && inlen > 0 ) return -1;
392n/a
393n/a if ( NULL == out ) return -1;
394n/a
395n/a if( NULL == key && keylen > 0 ) return -1;
396n/a
397n/a if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
398n/a
399n/a if( keylen > BLAKE2B_KEYBYTES ) return -1;
400n/a
401n/a if( keylen )
402n/a {
403n/a if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1;
404n/a }
405n/a else
406n/a {
407n/a if( blake2b_init( S, outlen ) < 0 ) return -1;
408n/a }
409n/a
410n/a blake2b_update( S, ( const uint8_t * )in, inlen );
411n/a blake2b_final( S, out, outlen );
412n/a return 0;
413n/a}
414n/a
415n/a#if defined(SUPERCOP)
416n/aint crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
417n/a{
418n/a return blake2b( out, in, NULL, BLAKE2B_OUTBYTES, inlen, 0 );
419n/a}
420n/a#endif
421n/a
422n/a#if defined(BLAKE2B_SELFTEST)
423n/a#include <string.h>
424n/a#include "blake2-kat.h"
425n/aint main( int argc, char **argv )
426n/a{
427n/a uint8_t key[BLAKE2B_KEYBYTES];
428n/a uint8_t buf[KAT_LENGTH];
429n/a size_t i;
430n/a
431n/a for( i = 0; i < BLAKE2B_KEYBYTES; ++i )
432n/a key[i] = ( uint8_t )i;
433n/a
434n/a for( i = 0; i < KAT_LENGTH; ++i )
435n/a buf[i] = ( uint8_t )i;
436n/a
437n/a for( i = 0; i < KAT_LENGTH; ++i )
438n/a {
439n/a uint8_t hash[BLAKE2B_OUTBYTES];
440n/a blake2b( hash, buf, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES );
441n/a
442n/a if( 0 != memcmp( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) )
443n/a {
444n/a puts( "error" );
445n/a return -1;
446n/a }
447n/a }
448n/a
449n/a puts( "ok" );
450n/a return 0;
451n/a}
452n/a#endif
453n/a