ยปCore Development>Code coverage>Modules/_blake2/impl/blake2s.c

Python code coverage for Modules/_blake2/impl/blake2s.c

#countcontent
1n/a/*
2n/a BLAKE2 reference source code package - optimized C implementations
3n/a
4n/a Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
5n/a terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
6n/a your option. The terms of these licenses can be found at:
7n/a
8n/a - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
9n/a - OpenSSL license : https://www.openssl.org/source/license.html
10n/a - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
11n/a
12n/a More information about the BLAKE2 hash function can be found at
13n/a https://blake2.net.
14n/a*/
15n/a
16n/a#include <stdint.h>
17n/a#include <string.h>
18n/a#include <stdio.h>
19n/a
20n/a#include "blake2.h"
21n/a#include "blake2-impl.h"
22n/a
23n/a#include "blake2-config.h"
24n/a
25n/a
26n/a#include <emmintrin.h>
27n/a#if defined(HAVE_SSSE3)
28n/a#include <tmmintrin.h>
29n/a#endif
30n/a#if defined(HAVE_SSE41)
31n/a#include <smmintrin.h>
32n/a#endif
33n/a#if defined(HAVE_AVX)
34n/a#include <immintrin.h>
35n/a#endif
36n/a#if defined(HAVE_XOP)
37n/a#include <x86intrin.h>
38n/a#endif
39n/a
40n/a#include "blake2s-round.h"
41n/a
42n/astatic const uint32_t blake2s_IV[8] =
43n/a{
44n/a 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
45n/a 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
46n/a};
47n/a
48n/astatic const uint8_t blake2s_sigma[10][16] =
49n/a{
50n/a { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
51n/a { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
52n/a { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
53n/a { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
54n/a { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
55n/a { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
56n/a { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
57n/a { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
58n/a { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
59n/a { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
60n/a};
61n/a
62n/a
63n/a/* Some helper functions, not necessarily useful */
64n/aBLAKE2_LOCAL_INLINE(int) blake2s_set_lastnode( blake2s_state *S )
65n/a{
66n/a S->f[1] = -1;
67n/a return 0;
68n/a}
69n/a
70n/aBLAKE2_LOCAL_INLINE(int) blake2s_clear_lastnode( blake2s_state *S )
71n/a{
72n/a S->f[1] = 0;
73n/a return 0;
74n/a}
75n/a
76n/aBLAKE2_LOCAL_INLINE(int) blake2s_is_lastblock( const blake2s_state *S )
77n/a{
78n/a return S->f[0] != 0;
79n/a}
80n/a
81n/aBLAKE2_LOCAL_INLINE(int) blake2s_set_lastblock( blake2s_state *S )
82n/a{
83n/a if( S->last_node ) blake2s_set_lastnode( S );
84n/a
85n/a S->f[0] = -1;
86n/a return 0;
87n/a}
88n/a
89n/aBLAKE2_LOCAL_INLINE(int) blake2s_clear_lastblock( blake2s_state *S )
90n/a{
91n/a if( S->last_node ) blake2s_clear_lastnode( S );
92n/a
93n/a S->f[0] = 0;
94n/a return 0;
95n/a}
96n/a
97n/aBLAKE2_LOCAL_INLINE(int) blake2s_increment_counter( blake2s_state *S, const uint32_t inc )
98n/a{
99n/a uint64_t t = ( ( uint64_t )S->t[1] << 32 ) | S->t[0];
100n/a t += inc;
101n/a S->t[0] = ( uint32_t )( t >> 0 );
102n/a S->t[1] = ( uint32_t )( t >> 32 );
103n/a return 0;
104n/a}
105n/a
106n/a
107n/a/* Parameter-related functions */
108n/aBLAKE2_LOCAL_INLINE(int) blake2s_param_set_digest_length( blake2s_param *P, const uint8_t digest_length )
109n/a{
110n/a P->digest_length = digest_length;
111n/a return 0;
112n/a}
113n/a
114n/aBLAKE2_LOCAL_INLINE(int) blake2s_param_set_fanout( blake2s_param *P, const uint8_t fanout )
115n/a{
116n/a P->fanout = fanout;
117n/a return 0;
118n/a}
119n/a
120n/aBLAKE2_LOCAL_INLINE(int) blake2s_param_set_max_depth( blake2s_param *P, const uint8_t depth )
121n/a{
122n/a P->depth = depth;
123n/a return 0;
124n/a}
125n/a
126n/aBLAKE2_LOCAL_INLINE(int) blake2s_param_set_leaf_length( blake2s_param *P, const uint32_t leaf_length )
127n/a{
128n/a P->leaf_length = leaf_length;
129n/a return 0;
130n/a}
131n/a
132n/aBLAKE2_LOCAL_INLINE(int) blake2s_param_set_node_offset( blake2s_param *P, const uint64_t node_offset )
133n/a{
134n/a store48( P->node_offset, node_offset );
135n/a return 0;
136n/a}
137n/a
138n/aBLAKE2_LOCAL_INLINE(int) blake2s_param_set_node_depth( blake2s_param *P, const uint8_t node_depth )
139n/a{
140n/a P->node_depth = node_depth;
141n/a return 0;
142n/a}
143n/a
144n/aBLAKE2_LOCAL_INLINE(int) blake2s_param_set_inner_length( blake2s_param *P, const uint8_t inner_length )
145n/a{
146n/a P->inner_length = inner_length;
147n/a return 0;
148n/a}
149n/a
150n/aBLAKE2_LOCAL_INLINE(int) blake2s_param_set_salt( blake2s_param *P, const uint8_t salt[BLAKE2S_SALTBYTES] )
151n/a{
152n/a memcpy( P->salt, salt, BLAKE2S_SALTBYTES );
153n/a return 0;
154n/a}
155n/a
156n/aBLAKE2_LOCAL_INLINE(int) blake2s_param_set_personal( blake2s_param *P, const uint8_t personal[BLAKE2S_PERSONALBYTES] )
157n/a{
158n/a memcpy( P->personal, personal, BLAKE2S_PERSONALBYTES );
159n/a return 0;
160n/a}
161n/a
162n/aBLAKE2_LOCAL_INLINE(int) blake2s_init0( blake2s_state *S )
163n/a{
164n/a int i;
165n/a memset( S, 0, sizeof( blake2s_state ) );
166n/a
167n/a for( i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i];
168n/a
169n/a return 0;
170n/a}
171n/a
172n/a/* init2 xors IV with input parameter block */
173n/aint blake2s_init_param( blake2s_state *S, const blake2s_param *P )
174n/a{
175n/a /*blake2s_init0( S ); */
176n/a const uint8_t * v = ( const uint8_t * )( blake2s_IV );
177n/a const uint8_t * p = ( const uint8_t * )( P );
178n/a uint8_t * h = ( uint8_t * )( S->h );
179n/a int i;
180n/a /* IV XOR ParamBlock */
181n/a memset( S, 0, sizeof( blake2s_state ) );
182n/a
183n/a for( i = 0; i < BLAKE2S_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
184n/a
185n/a return 0;
186n/a}
187n/a
188n/a
189n/a/* Some sort of default parameter block initialization, for sequential blake2s */
190n/aint blake2s_init( blake2s_state *S, const uint8_t outlen )
191n/a{
192n/a const blake2s_param P =
193n/a {
194n/a outlen,
195n/a 0,
196n/a 1,
197n/a 1,
198n/a 0,
199n/a {0},
200n/a 0,
201n/a 0,
202n/a {0},
203n/a {0}
204n/a };
205n/a /* Move interval verification here? */
206n/a if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
207n/a return blake2s_init_param( S, &P );
208n/a}
209n/a
210n/a
211n/aint blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, const uint8_t keylen )
212n/a{
213n/a const blake2s_param P =
214n/a {
215n/a outlen,
216n/a keylen,
217n/a 1,
218n/a 1,
219n/a 0,
220n/a {0},
221n/a 0,
222n/a 0,
223n/a {0},
224n/a {0}
225n/a };
226n/a
227n/a /* Move interval verification here? */
228n/a if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
229n/a
230n/a if ( ( !key ) || ( !keylen ) || keylen > BLAKE2S_KEYBYTES ) return -1;
231n/a
232n/a if( blake2s_init_param( S, &P ) < 0 )
233n/a return -1;
234n/a
235n/a {
236n/a uint8_t block[BLAKE2S_BLOCKBYTES];
237n/a memset( block, 0, BLAKE2S_BLOCKBYTES );
238n/a memcpy( block, key, keylen );
239n/a blake2s_update( S, block, BLAKE2S_BLOCKBYTES );
240n/a secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
241n/a }
242n/a return 0;
243n/a}
244n/a
245n/a
246n/aBLAKE2_LOCAL_INLINE(int) blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] )
247n/a{
248n/a __m128i row1, row2, row3, row4;
249n/a __m128i buf1, buf2, buf3, buf4;
250n/a#if defined(HAVE_SSE41)
251n/a __m128i t0, t1;
252n/a#if !defined(HAVE_XOP)
253n/a __m128i t2;
254n/a#endif
255n/a#endif
256n/a __m128i ff0, ff1;
257n/a#if defined(HAVE_SSSE3) && !defined(HAVE_XOP)
258n/a const __m128i r8 = _mm_set_epi8( 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1 );
259n/a const __m128i r16 = _mm_set_epi8( 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2 );
260n/a#endif
261n/a#if defined(HAVE_SSE41)
262n/a const __m128i m0 = LOADU( block + 00 );
263n/a const __m128i m1 = LOADU( block + 16 );
264n/a const __m128i m2 = LOADU( block + 32 );
265n/a const __m128i m3 = LOADU( block + 48 );
266n/a#else
267n/a const uint32_t m0 = ( ( uint32_t * )block )[ 0];
268n/a const uint32_t m1 = ( ( uint32_t * )block )[ 1];
269n/a const uint32_t m2 = ( ( uint32_t * )block )[ 2];
270n/a const uint32_t m3 = ( ( uint32_t * )block )[ 3];
271n/a const uint32_t m4 = ( ( uint32_t * )block )[ 4];
272n/a const uint32_t m5 = ( ( uint32_t * )block )[ 5];
273n/a const uint32_t m6 = ( ( uint32_t * )block )[ 6];
274n/a const uint32_t m7 = ( ( uint32_t * )block )[ 7];
275n/a const uint32_t m8 = ( ( uint32_t * )block )[ 8];
276n/a const uint32_t m9 = ( ( uint32_t * )block )[ 9];
277n/a const uint32_t m10 = ( ( uint32_t * )block )[10];
278n/a const uint32_t m11 = ( ( uint32_t * )block )[11];
279n/a const uint32_t m12 = ( ( uint32_t * )block )[12];
280n/a const uint32_t m13 = ( ( uint32_t * )block )[13];
281n/a const uint32_t m14 = ( ( uint32_t * )block )[14];
282n/a const uint32_t m15 = ( ( uint32_t * )block )[15];
283n/a#endif
284n/a row1 = ff0 = LOADU( &S->h[0] );
285n/a row2 = ff1 = LOADU( &S->h[4] );
286n/a row3 = _mm_setr_epi32( 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A );
287n/a row4 = _mm_xor_si128( _mm_setr_epi32( 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 ), LOADU( &S->t[0] ) );
288n/a ROUND( 0 );
289n/a ROUND( 1 );
290n/a ROUND( 2 );
291n/a ROUND( 3 );
292n/a ROUND( 4 );
293n/a ROUND( 5 );
294n/a ROUND( 6 );
295n/a ROUND( 7 );
296n/a ROUND( 8 );
297n/a ROUND( 9 );
298n/a STOREU( &S->h[0], _mm_xor_si128( ff0, _mm_xor_si128( row1, row3 ) ) );
299n/a STOREU( &S->h[4], _mm_xor_si128( ff1, _mm_xor_si128( row2, row4 ) ) );
300n/a return 0;
301n/a}
302n/a
303n/a/* inlen now in bytes */
304n/aint blake2s_update( blake2s_state *S, const uint8_t *in, uint64_t inlen )
305n/a{
306n/a while( inlen > 0 )
307n/a {
308n/a size_t left = S->buflen;
309n/a size_t fill = 2 * BLAKE2S_BLOCKBYTES - left;
310n/a
311n/a if( inlen > fill )
312n/a {
313n/a memcpy( S->buf + left, in, fill ); /* Fill buffer */
314n/a S->buflen += fill;
315n/a blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
316n/a blake2s_compress( S, S->buf ); /* Compress */
317n/a memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); /* Shift buffer left */
318n/a S->buflen -= BLAKE2S_BLOCKBYTES;
319n/a in += fill;
320n/a inlen -= fill;
321n/a }
322n/a else /* inlen <= fill */
323n/a {
324n/a memcpy( S->buf + left, in, inlen );
325n/a S->buflen += inlen; /* Be lazy, do not compress */
326n/a in += inlen;
327n/a inlen -= inlen;
328n/a }
329n/a }
330n/a
331n/a return 0;
332n/a}
333n/a
334n/a/* Is this correct? */
335n/aint blake2s_final( blake2s_state *S, uint8_t *out, uint8_t outlen )
336n/a{
337n/a uint8_t buffer[BLAKE2S_OUTBYTES] = {0};
338n/a int i;
339n/a
340n/a if( outlen > BLAKE2S_OUTBYTES )
341n/a return -1;
342n/a
343n/a if( blake2s_is_lastblock( S ) )
344n/a return -1;
345n/a
346n/a if( S->buflen > BLAKE2S_BLOCKBYTES )
347n/a {
348n/a blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
349n/a blake2s_compress( S, S->buf );
350n/a S->buflen -= BLAKE2S_BLOCKBYTES;
351n/a memmove( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen );
352n/a }
353n/a
354n/a blake2s_increment_counter( S, ( uint32_t )S->buflen );
355n/a blake2s_set_lastblock( S );
356n/a memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */
357n/a blake2s_compress( S, S->buf );
358n/a
359n/a for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
360n/a store32( buffer + sizeof( S->h[i] ) * i, S->h[i] );
361n/a
362n/a memcpy( out, buffer, outlen );
363n/a return 0;
364n/a}
365n/a
366n/a/* inlen, at least, should be uint64_t. Others can be size_t. */
367n/aint blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen )
368n/a{
369n/a blake2s_state S[1];
370n/a
371n/a /* Verify parameters */
372n/a if ( NULL == in && inlen > 0 ) return -1;
373n/a
374n/a if ( NULL == out ) return -1;
375n/a
376n/a if ( NULL == key && keylen > 0) return -1;
377n/a
378n/a if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
379n/a
380n/a if( keylen > BLAKE2S_KEYBYTES ) return -1;
381n/a
382n/a if( keylen > 0 )
383n/a {
384n/a if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1;
385n/a }
386n/a else
387n/a {
388n/a if( blake2s_init( S, outlen ) < 0 ) return -1;
389n/a }
390n/a
391n/a blake2s_update( S, ( const uint8_t * )in, inlen );
392n/a blake2s_final( S, out, outlen );
393n/a return 0;
394n/a}
395n/a
396n/a#if defined(SUPERCOP)
397n/aint crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
398n/a{
399n/a return blake2s( out, in, NULL, BLAKE2S_OUTBYTES, inlen, 0 );
400n/a}
401n/a#endif
402n/a
403n/a#if defined(BLAKE2S_SELFTEST)
404n/a#include <string.h>
405n/a#include "blake2-kat.h"
406n/aint main( int argc, char **argv )
407n/a{
408n/a uint8_t key[BLAKE2S_KEYBYTES];
409n/a uint8_t buf[KAT_LENGTH];
410n/a size_t i;
411n/a
412n/a for( i = 0; i < BLAKE2S_KEYBYTES; ++i )
413n/a key[i] = ( uint8_t )i;
414n/a
415n/a for( i = 0; i < KAT_LENGTH; ++i )
416n/a buf[i] = ( uint8_t )i;
417n/a
418n/a for( i = 0; i < KAT_LENGTH; ++i )
419n/a {
420n/a uint8_t hash[BLAKE2S_OUTBYTES];
421n/a
422n/a if( blake2s( hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES ) < 0 ||
423n/a 0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) )
424n/a {
425n/a puts( "error" );
426n/a return -1;
427n/a }
428n/a }
429n/a
430n/a puts( "ok" );
431n/a return 0;
432n/a}
433n/a#endif
434n/a
435n/a