ยปCore Development>Code coverage>Modules/_decimal/libmpdec/difradix2.c

Python code coverage for Modules/_decimal/libmpdec/difradix2.c

#countcontent
1n/a/*
2n/a * Copyright (c) 2008-2016 Stefan Krah. All rights reserved.
3n/a *
4n/a * Redistribution and use in source and binary forms, with or without
5n/a * modification, are permitted provided that the following conditions
6n/a * are met:
7n/a *
8n/a * 1. Redistributions of source code must retain the above copyright
9n/a * notice, this list of conditions and the following disclaimer.
10n/a *
11n/a * 2. Redistributions in binary form must reproduce the above copyright
12n/a * notice, this list of conditions and the following disclaimer in the
13n/a * documentation and/or other materials provided with the distribution.
14n/a *
15n/a * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
16n/a * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17n/a * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18n/a * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19n/a * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20n/a * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21n/a * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22n/a * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23n/a * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24n/a * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25n/a * SUCH DAMAGE.
26n/a */
27n/a
28n/a
29n/a#include "mpdecimal.h"
30n/a#include <stdio.h>
31n/a#include <assert.h>
32n/a#include "bits.h"
33n/a#include "numbertheory.h"
34n/a#include "umodarith.h"
35n/a#include "difradix2.h"
36n/a
37n/a
38n/a/* Bignum: The actual transform routine (decimation in frequency). */
39n/a
40n/a
41n/a/*
42n/a * Generate index pairs (x, bitreverse(x)) and carry out the permutation.
43n/a * n must be a power of two.
44n/a * Algorithm due to Brent/Lehmann, see Joerg Arndt, "Matters Computational",
45n/a * Chapter 1.14.4. [http://www.jjj.de/fxt/]
46n/a */
47n/astatic inline void
48n/abitreverse_permute(mpd_uint_t a[], mpd_size_t n)
49n/a{
50n/a mpd_size_t x = 0;
51n/a mpd_size_t r = 0;
52n/a mpd_uint_t t;
53n/a
54n/a do { /* Invariant: r = bitreverse(x) */
55n/a if (r > x) {
56n/a t = a[x];
57n/a a[x] = a[r];
58n/a a[r] = t;
59n/a }
60n/a /* Flip trailing consecutive 1 bits and the first zero bit
61n/a * that absorbs a possible carry. */
62n/a x += 1;
63n/a /* Mirror the operation on r: Flip n_trailing_zeros(x)+1
64n/a high bits of r. */
65n/a r ^= (n - (n >> (mpd_bsf(x)+1)));
66n/a /* The loop invariant is preserved. */
67n/a } while (x < n);
68n/a}
69n/a
70n/a
71n/a/* Fast Number Theoretic Transform, decimation in frequency. */
72n/avoid
73n/afnt_dif2(mpd_uint_t a[], mpd_size_t n, struct fnt_params *tparams)
74n/a{
75n/a mpd_uint_t *wtable = tparams->wtable;
76n/a mpd_uint_t umod;
77n/a#ifdef PPRO
78n/a double dmod;
79n/a uint32_t dinvmod[3];
80n/a#endif
81n/a mpd_uint_t u0, u1, v0, v1;
82n/a mpd_uint_t w, w0, w1, wstep;
83n/a mpd_size_t m, mhalf;
84n/a mpd_size_t j, r;
85n/a
86n/a
87n/a assert(ispower2(n));
88n/a assert(n >= 4);
89n/a
90n/a SETMODULUS(tparams->modnum);
91n/a
92n/a /* m == n */
93n/a mhalf = n / 2;
94n/a for (j = 0; j < mhalf; j += 2) {
95n/a
96n/a w0 = wtable[j];
97n/a w1 = wtable[j+1];
98n/a
99n/a u0 = a[j];
100n/a v0 = a[j+mhalf];
101n/a
102n/a u1 = a[j+1];
103n/a v1 = a[j+1+mhalf];
104n/a
105n/a a[j] = addmod(u0, v0, umod);
106n/a v0 = submod(u0, v0, umod);
107n/a
108n/a a[j+1] = addmod(u1, v1, umod);
109n/a v1 = submod(u1, v1, umod);
110n/a
111n/a MULMOD2(&v0, w0, &v1, w1);
112n/a
113n/a a[j+mhalf] = v0;
114n/a a[j+1+mhalf] = v1;
115n/a
116n/a }
117n/a
118n/a wstep = 2;
119n/a for (m = n/2; m >= 2; m>>=1, wstep<<=1) {
120n/a
121n/a mhalf = m / 2;
122n/a
123n/a /* j == 0 */
124n/a for (r = 0; r < n; r += 2*m) {
125n/a
126n/a u0 = a[r];
127n/a v0 = a[r+mhalf];
128n/a
129n/a u1 = a[m+r];
130n/a v1 = a[m+r+mhalf];
131n/a
132n/a a[r] = addmod(u0, v0, umod);
133n/a v0 = submod(u0, v0, umod);
134n/a
135n/a a[m+r] = addmod(u1, v1, umod);
136n/a v1 = submod(u1, v1, umod);
137n/a
138n/a a[r+mhalf] = v0;
139n/a a[m+r+mhalf] = v1;
140n/a }
141n/a
142n/a for (j = 1; j < mhalf; j++) {
143n/a
144n/a w = wtable[j*wstep];
145n/a
146n/a for (r = 0; r < n; r += 2*m) {
147n/a
148n/a u0 = a[r+j];
149n/a v0 = a[r+j+mhalf];
150n/a
151n/a u1 = a[m+r+j];
152n/a v1 = a[m+r+j+mhalf];
153n/a
154n/a a[r+j] = addmod(u0, v0, umod);
155n/a v0 = submod(u0, v0, umod);
156n/a
157n/a a[m+r+j] = addmod(u1, v1, umod);
158n/a v1 = submod(u1, v1, umod);
159n/a
160n/a MULMOD2C(&v0, &v1, w);
161n/a
162n/a a[r+j+mhalf] = v0;
163n/a a[m+r+j+mhalf] = v1;
164n/a }
165n/a
166n/a }
167n/a
168n/a }
169n/a
170n/a bitreverse_permute(a, n);
171n/a}
172n/a
173n/a