ยปCore Development>Code coverage>Modules/_decimal/libmpdec/transpose.c

Python code coverage for Modules/_decimal/libmpdec/transpose.c

#countcontent
1n/a/*
2n/a * Copyright (c) 2008-2016 Stefan Krah. All rights reserved.
3n/a *
4n/a * Redistribution and use in source and binary forms, with or without
5n/a * modification, are permitted provided that the following conditions
6n/a * are met:
7n/a *
8n/a * 1. Redistributions of source code must retain the above copyright
9n/a * notice, this list of conditions and the following disclaimer.
10n/a *
11n/a * 2. Redistributions in binary form must reproduce the above copyright
12n/a * notice, this list of conditions and the following disclaimer in the
13n/a * documentation and/or other materials provided with the distribution.
14n/a *
15n/a * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
16n/a * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17n/a * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18n/a * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19n/a * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20n/a * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21n/a * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22n/a * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23n/a * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24n/a * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25n/a * SUCH DAMAGE.
26n/a */
27n/a
28n/a
29n/a#include "mpdecimal.h"
30n/a#include <stdio.h>
31n/a#include <stdlib.h>
32n/a#include <string.h>
33n/a#include <limits.h>
34n/a#include <assert.h>
35n/a#include "bits.h"
36n/a#include "constants.h"
37n/a#include "typearith.h"
38n/a#include "transpose.h"
39n/a
40n/a
41n/a#define BUFSIZE 4096
42n/a#define SIDE 128
43n/a
44n/a
45n/a/* Bignum: The transpose functions are used for very large transforms
46n/a in sixstep.c and fourstep.c. */
47n/a
48n/a
49n/a/* Definition of the matrix transpose */
50n/avoid
51n/astd_trans(mpd_uint_t dest[], mpd_uint_t src[], mpd_size_t rows, mpd_size_t cols)
52n/a{
53n/a mpd_size_t idest, isrc;
54n/a mpd_size_t r, c;
55n/a
56n/a for (r = 0; r < rows; r++) {
57n/a isrc = r * cols;
58n/a idest = r;
59n/a for (c = 0; c < cols; c++) {
60n/a dest[idest] = src[isrc];
61n/a isrc += 1;
62n/a idest += rows;
63n/a }
64n/a }
65n/a}
66n/a
67n/a/*
68n/a * Swap half-rows of 2^n * (2*2^n) matrix.
69n/a * FORWARD_CYCLE: even/odd permutation of the halfrows.
70n/a * BACKWARD_CYCLE: reverse the even/odd permutation.
71n/a */
72n/astatic int
73n/aswap_halfrows_pow2(mpd_uint_t *matrix, mpd_size_t rows, mpd_size_t cols, int dir)
74n/a{
75n/a mpd_uint_t buf1[BUFSIZE];
76n/a mpd_uint_t buf2[BUFSIZE];
77n/a mpd_uint_t *readbuf, *writebuf, *hp;
78n/a mpd_size_t *done, dbits;
79n/a mpd_size_t b = BUFSIZE, stride;
80n/a mpd_size_t hn, hmax; /* halfrow number */
81n/a mpd_size_t m, r=0;
82n/a mpd_size_t offset;
83n/a mpd_size_t next;
84n/a
85n/a
86n/a assert(cols == mul_size_t(2, rows));
87n/a
88n/a if (dir == FORWARD_CYCLE) {
89n/a r = rows;
90n/a }
91n/a else if (dir == BACKWARD_CYCLE) {
92n/a r = 2;
93n/a }
94n/a else {
95n/a abort(); /* GCOV_NOT_REACHED */
96n/a }
97n/a
98n/a m = cols - 1;
99n/a hmax = rows; /* cycles start at odd halfrows */
100n/a dbits = 8 * sizeof *done;
101n/a if ((done = mpd_calloc(hmax/(sizeof *done) + 1, sizeof *done)) == NULL) {
102n/a return 0;
103n/a }
104n/a
105n/a for (hn = 1; hn <= hmax; hn += 2) {
106n/a
107n/a if (done[hn/dbits] & mpd_bits[hn%dbits]) {
108n/a continue;
109n/a }
110n/a
111n/a readbuf = buf1; writebuf = buf2;
112n/a
113n/a for (offset = 0; offset < cols/2; offset += b) {
114n/a
115n/a stride = (offset + b < cols/2) ? b : cols/2-offset;
116n/a
117n/a hp = matrix + hn*cols/2;
118n/a memcpy(readbuf, hp+offset, stride*(sizeof *readbuf));
119n/a pointerswap(&readbuf, &writebuf);
120n/a
121n/a next = mulmod_size_t(hn, r, m);
122n/a hp = matrix + next*cols/2;
123n/a
124n/a while (next != hn) {
125n/a
126n/a memcpy(readbuf, hp+offset, stride*(sizeof *readbuf));
127n/a memcpy(hp+offset, writebuf, stride*(sizeof *writebuf));
128n/a pointerswap(&readbuf, &writebuf);
129n/a
130n/a done[next/dbits] |= mpd_bits[next%dbits];
131n/a
132n/a next = mulmod_size_t(next, r, m);
133n/a hp = matrix + next*cols/2;
134n/a
135n/a }
136n/a
137n/a memcpy(hp+offset, writebuf, stride*(sizeof *writebuf));
138n/a
139n/a done[hn/dbits] |= mpd_bits[hn%dbits];
140n/a }
141n/a }
142n/a
143n/a mpd_free(done);
144n/a return 1;
145n/a}
146n/a
147n/a/* In-place transpose of a square matrix */
148n/astatic inline void
149n/asquaretrans(mpd_uint_t *buf, mpd_size_t cols)
150n/a{
151n/a mpd_uint_t tmp;
152n/a mpd_size_t idest, isrc;
153n/a mpd_size_t r, c;
154n/a
155n/a for (r = 0; r < cols; r++) {
156n/a c = r+1;
157n/a isrc = r*cols + c;
158n/a idest = c*cols + r;
159n/a for (c = r+1; c < cols; c++) {
160n/a tmp = buf[isrc];
161n/a buf[isrc] = buf[idest];
162n/a buf[idest] = tmp;
163n/a isrc += 1;
164n/a idest += cols;
165n/a }
166n/a }
167n/a}
168n/a
169n/a/*
170n/a * Transpose 2^n * 2^n matrix. For cache efficiency, the matrix is split into
171n/a * square blocks with side length 'SIDE'. First, the blocks are transposed,
172n/a * then a square transposition is done on each individual block.
173n/a */
174n/astatic void
175n/asquaretrans_pow2(mpd_uint_t *matrix, mpd_size_t size)
176n/a{
177n/a mpd_uint_t buf1[SIDE*SIDE];
178n/a mpd_uint_t buf2[SIDE*SIDE];
179n/a mpd_uint_t *to, *from;
180n/a mpd_size_t b = size;
181n/a mpd_size_t r, c;
182n/a mpd_size_t i;
183n/a
184n/a while (b > SIDE) b >>= 1;
185n/a
186n/a for (r = 0; r < size; r += b) {
187n/a
188n/a for (c = r; c < size; c += b) {
189n/a
190n/a from = matrix + r*size + c;
191n/a to = buf1;
192n/a for (i = 0; i < b; i++) {
193n/a memcpy(to, from, b*(sizeof *to));
194n/a from += size;
195n/a to += b;
196n/a }
197n/a squaretrans(buf1, b);
198n/a
199n/a if (r == c) {
200n/a to = matrix + r*size + c;
201n/a from = buf1;
202n/a for (i = 0; i < b; i++) {
203n/a memcpy(to, from, b*(sizeof *to));
204n/a from += b;
205n/a to += size;
206n/a }
207n/a continue;
208n/a }
209n/a else {
210n/a from = matrix + c*size + r;
211n/a to = buf2;
212n/a for (i = 0; i < b; i++) {
213n/a memcpy(to, from, b*(sizeof *to));
214n/a from += size;
215n/a to += b;
216n/a }
217n/a squaretrans(buf2, b);
218n/a
219n/a to = matrix + c*size + r;
220n/a from = buf1;
221n/a for (i = 0; i < b; i++) {
222n/a memcpy(to, from, b*(sizeof *to));
223n/a from += b;
224n/a to += size;
225n/a }
226n/a
227n/a to = matrix + r*size + c;
228n/a from = buf2;
229n/a for (i = 0; i < b; i++) {
230n/a memcpy(to, from, b*(sizeof *to));
231n/a from += b;
232n/a to += size;
233n/a }
234n/a }
235n/a }
236n/a }
237n/a
238n/a}
239n/a
240n/a/*
241n/a * In-place transposition of a 2^n x 2^n or a 2^n x (2*2^n)
242n/a * or a (2*2^n) x 2^n matrix.
243n/a */
244n/aint
245n/atranspose_pow2(mpd_uint_t *matrix, mpd_size_t rows, mpd_size_t cols)
246n/a{
247n/a mpd_size_t size = mul_size_t(rows, cols);
248n/a
249n/a assert(ispower2(rows));
250n/a assert(ispower2(cols));
251n/a
252n/a if (cols == rows) {
253n/a squaretrans_pow2(matrix, rows);
254n/a }
255n/a else if (cols == mul_size_t(2, rows)) {
256n/a if (!swap_halfrows_pow2(matrix, rows, cols, FORWARD_CYCLE)) {
257n/a return 0;
258n/a }
259n/a squaretrans_pow2(matrix, rows);
260n/a squaretrans_pow2(matrix+(size/2), rows);
261n/a }
262n/a else if (rows == mul_size_t(2, cols)) {
263n/a squaretrans_pow2(matrix, cols);
264n/a squaretrans_pow2(matrix+(size/2), cols);
265n/a if (!swap_halfrows_pow2(matrix, cols, rows, BACKWARD_CYCLE)) {
266n/a return 0;
267n/a }
268n/a }
269n/a else {
270n/a abort(); /* GCOV_NOT_REACHED */
271n/a }
272n/a
273n/a return 1;
274n/a}
275n/a
276n/a