ยปCore Development>Code coverage>Modules/_ctypes/libffi/src/aarch64/ffi.c

Python code coverage for Modules/_ctypes/libffi/src/aarch64/ffi.c

#countcontent
1n/a/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
2n/a
3n/aPermission is hereby granted, free of charge, to any person obtaining
4n/aa copy of this software and associated documentation files (the
5n/a``Software''), to deal in the Software without restriction, including
6n/awithout limitation the rights to use, copy, modify, merge, publish,
7n/adistribute, sublicense, and/or sell copies of the Software, and to
8n/apermit persons to whom the Software is furnished to do so, subject to
9n/athe following conditions:
10n/a
11n/aThe above copyright notice and this permission notice shall be
12n/aincluded in all copies or substantial portions of the Software.
13n/a
14n/aTHE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
15n/aEXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16n/aMERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17n/aIN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18n/aCLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19n/aTORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20n/aSOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
21n/a
22n/a#include <stdio.h>
23n/a
24n/a#include <ffi.h>
25n/a#include <ffi_common.h>
26n/a
27n/a#include <stdlib.h>
28n/a
29n/a/* Stack alignment requirement in bytes */
30n/a#if defined (__APPLE__)
31n/a#define AARCH64_STACK_ALIGN 1
32n/a#else
33n/a#define AARCH64_STACK_ALIGN 16
34n/a#endif
35n/a
36n/a#define N_X_ARG_REG 8
37n/a#define N_V_ARG_REG 8
38n/a
39n/a#define AARCH64_FFI_WITH_V (1 << AARCH64_FFI_WITH_V_BIT)
40n/a
41n/aunion _d
42n/a{
43n/a UINT64 d;
44n/a UINT32 s[2];
45n/a};
46n/a
47n/astruct call_context
48n/a{
49n/a UINT64 x [AARCH64_N_XREG];
50n/a struct
51n/a {
52n/a union _d d[2];
53n/a } v [AARCH64_N_VREG];
54n/a};
55n/a
56n/a#if defined (__clang__) && defined (__APPLE__)
57n/aextern void
58n/asys_icache_invalidate (void *start, size_t len);
59n/a#endif
60n/a
61n/astatic inline void
62n/affi_clear_cache (void *start, void *end)
63n/a{
64n/a#if defined (__clang__) && defined (__APPLE__)
65n/a sys_icache_invalidate (start, (char *)end - (char *)start);
66n/a#elif defined (__GNUC__)
67n/a __builtin___clear_cache (start, end);
68n/a#else
69n/a#error "Missing builtin to flush instruction cache"
70n/a#endif
71n/a}
72n/a
73n/astatic void *
74n/aget_x_addr (struct call_context *context, unsigned n)
75n/a{
76n/a return &context->x[n];
77n/a}
78n/a
79n/astatic void *
80n/aget_s_addr (struct call_context *context, unsigned n)
81n/a{
82n/a#if defined __AARCH64EB__
83n/a return &context->v[n].d[1].s[1];
84n/a#else
85n/a return &context->v[n].d[0].s[0];
86n/a#endif
87n/a}
88n/a
89n/astatic void *
90n/aget_d_addr (struct call_context *context, unsigned n)
91n/a{
92n/a#if defined __AARCH64EB__
93n/a return &context->v[n].d[1];
94n/a#else
95n/a return &context->v[n].d[0];
96n/a#endif
97n/a}
98n/a
99n/astatic void *
100n/aget_v_addr (struct call_context *context, unsigned n)
101n/a{
102n/a return &context->v[n];
103n/a}
104n/a
105n/a/* Return the memory location at which a basic type would reside
106n/a were it to have been stored in register n. */
107n/a
108n/astatic void *
109n/aget_basic_type_addr (unsigned short type, struct call_context *context,
110n/a unsigned n)
111n/a{
112n/a switch (type)
113n/a {
114n/a case FFI_TYPE_FLOAT:
115n/a return get_s_addr (context, n);
116n/a case FFI_TYPE_DOUBLE:
117n/a return get_d_addr (context, n);
118n/a#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
119n/a case FFI_TYPE_LONGDOUBLE:
120n/a return get_v_addr (context, n);
121n/a#endif
122n/a case FFI_TYPE_UINT8:
123n/a case FFI_TYPE_SINT8:
124n/a case FFI_TYPE_UINT16:
125n/a case FFI_TYPE_SINT16:
126n/a case FFI_TYPE_UINT32:
127n/a case FFI_TYPE_SINT32:
128n/a case FFI_TYPE_INT:
129n/a case FFI_TYPE_POINTER:
130n/a case FFI_TYPE_UINT64:
131n/a case FFI_TYPE_SINT64:
132n/a return get_x_addr (context, n);
133n/a case FFI_TYPE_VOID:
134n/a return NULL;
135n/a default:
136n/a FFI_ASSERT (0);
137n/a return NULL;
138n/a }
139n/a}
140n/a
141n/a/* Return the alignment width for each of the basic types. */
142n/a
143n/astatic size_t
144n/aget_basic_type_alignment (unsigned short type)
145n/a{
146n/a switch (type)
147n/a {
148n/a case FFI_TYPE_FLOAT:
149n/a case FFI_TYPE_DOUBLE:
150n/a return sizeof (UINT64);
151n/a#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
152n/a case FFI_TYPE_LONGDOUBLE:
153n/a return sizeof (long double);
154n/a#endif
155n/a case FFI_TYPE_UINT8:
156n/a case FFI_TYPE_SINT8:
157n/a#if defined (__APPLE__)
158n/a return sizeof (UINT8);
159n/a#endif
160n/a case FFI_TYPE_UINT16:
161n/a case FFI_TYPE_SINT16:
162n/a#if defined (__APPLE__)
163n/a return sizeof (UINT16);
164n/a#endif
165n/a case FFI_TYPE_UINT32:
166n/a case FFI_TYPE_INT:
167n/a case FFI_TYPE_SINT32:
168n/a#if defined (__APPLE__)
169n/a return sizeof (UINT32);
170n/a#endif
171n/a case FFI_TYPE_POINTER:
172n/a case FFI_TYPE_UINT64:
173n/a case FFI_TYPE_SINT64:
174n/a return sizeof (UINT64);
175n/a
176n/a default:
177n/a FFI_ASSERT (0);
178n/a return 0;
179n/a }
180n/a}
181n/a
182n/a/* Return the size in bytes for each of the basic types. */
183n/a
184n/astatic size_t
185n/aget_basic_type_size (unsigned short type)
186n/a{
187n/a switch (type)
188n/a {
189n/a case FFI_TYPE_FLOAT:
190n/a return sizeof (UINT32);
191n/a case FFI_TYPE_DOUBLE:
192n/a return sizeof (UINT64);
193n/a#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
194n/a case FFI_TYPE_LONGDOUBLE:
195n/a return sizeof (long double);
196n/a#endif
197n/a case FFI_TYPE_UINT8:
198n/a return sizeof (UINT8);
199n/a case FFI_TYPE_SINT8:
200n/a return sizeof (SINT8);
201n/a case FFI_TYPE_UINT16:
202n/a return sizeof (UINT16);
203n/a case FFI_TYPE_SINT16:
204n/a return sizeof (SINT16);
205n/a case FFI_TYPE_UINT32:
206n/a return sizeof (UINT32);
207n/a case FFI_TYPE_INT:
208n/a case FFI_TYPE_SINT32:
209n/a return sizeof (SINT32);
210n/a case FFI_TYPE_POINTER:
211n/a case FFI_TYPE_UINT64:
212n/a return sizeof (UINT64);
213n/a case FFI_TYPE_SINT64:
214n/a return sizeof (SINT64);
215n/a
216n/a default:
217n/a FFI_ASSERT (0);
218n/a return 0;
219n/a }
220n/a}
221n/a
222n/aextern void
223n/affi_call_SYSV (unsigned (*)(struct call_context *context, unsigned char *,
224n/a extended_cif *),
225n/a struct call_context *context,
226n/a extended_cif *,
227n/a size_t,
228n/a void (*fn)(void));
229n/a
230n/aextern void
231n/affi_closure_SYSV (ffi_closure *);
232n/a
233n/a/* Test for an FFI floating point representation. */
234n/a
235n/astatic unsigned
236n/ais_floating_type (unsigned short type)
237n/a{
238n/a return (type == FFI_TYPE_FLOAT || type == FFI_TYPE_DOUBLE
239n/a || type == FFI_TYPE_LONGDOUBLE);
240n/a}
241n/a
242n/a/* Test for a homogeneous structure. */
243n/a
244n/astatic unsigned short
245n/aget_homogeneous_type (ffi_type *ty)
246n/a{
247n/a if (ty->type == FFI_TYPE_STRUCT && ty->elements)
248n/a {
249n/a unsigned i;
250n/a unsigned short candidate_type
251n/a = get_homogeneous_type (ty->elements[0]);
252n/a for (i =1; ty->elements[i]; i++)
253n/a {
254n/a unsigned short iteration_type = 0;
255n/a /* If we have a nested struct, we must find its homogeneous type.
256n/a If that fits with our candidate type, we are still
257n/a homogeneous. */
258n/a if (ty->elements[i]->type == FFI_TYPE_STRUCT
259n/a && ty->elements[i]->elements)
260n/a {
261n/a iteration_type = get_homogeneous_type (ty->elements[i]);
262n/a }
263n/a else
264n/a {
265n/a iteration_type = ty->elements[i]->type;
266n/a }
267n/a
268n/a /* If we are not homogeneous, return FFI_TYPE_STRUCT. */
269n/a if (candidate_type != iteration_type)
270n/a return FFI_TYPE_STRUCT;
271n/a }
272n/a return candidate_type;
273n/a }
274n/a
275n/a /* Base case, we have no more levels of nesting, so we
276n/a are a basic type, and so, trivially homogeneous in that type. */
277n/a return ty->type;
278n/a}
279n/a
280n/a/* Determine the number of elements within a STRUCT.
281n/a
282n/a Note, we must handle nested structs.
283n/a
284n/a If ty is not a STRUCT this function will return 0. */
285n/a
286n/astatic unsigned
287n/aelement_count (ffi_type *ty)
288n/a{
289n/a if (ty->type == FFI_TYPE_STRUCT && ty->elements)
290n/a {
291n/a unsigned n;
292n/a unsigned elems = 0;
293n/a for (n = 0; ty->elements[n]; n++)
294n/a {
295n/a if (ty->elements[n]->type == FFI_TYPE_STRUCT
296n/a && ty->elements[n]->elements)
297n/a elems += element_count (ty->elements[n]);
298n/a else
299n/a elems++;
300n/a }
301n/a return elems;
302n/a }
303n/a return 0;
304n/a}
305n/a
306n/a/* Test for a homogeneous floating point aggregate.
307n/a
308n/a A homogeneous floating point aggregate is a homogeneous aggregate of
309n/a a half- single- or double- precision floating point type with one
310n/a to four elements. Note that this includes nested structs of the
311n/a basic type. */
312n/a
313n/astatic int
314n/ais_hfa (ffi_type *ty)
315n/a{
316n/a if (ty->type == FFI_TYPE_STRUCT
317n/a && ty->elements[0]
318n/a && is_floating_type (get_homogeneous_type (ty)))
319n/a {
320n/a unsigned n = element_count (ty);
321n/a return n >= 1 && n <= 4;
322n/a }
323n/a return 0;
324n/a}
325n/a
326n/a/* Test if an ffi_type is a candidate for passing in a register.
327n/a
328n/a This test does not check that sufficient registers of the
329n/a appropriate class are actually available, merely that IFF
330n/a sufficient registers are available then the argument will be passed
331n/a in register(s).
332n/a
333n/a Note that an ffi_type that is deemed to be a register candidate
334n/a will always be returned in registers.
335n/a
336n/a Returns 1 if a register candidate else 0. */
337n/a
338n/astatic int
339n/ais_register_candidate (ffi_type *ty)
340n/a{
341n/a switch (ty->type)
342n/a {
343n/a case FFI_TYPE_VOID:
344n/a case FFI_TYPE_FLOAT:
345n/a case FFI_TYPE_DOUBLE:
346n/a#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
347n/a case FFI_TYPE_LONGDOUBLE:
348n/a#endif
349n/a case FFI_TYPE_UINT8:
350n/a case FFI_TYPE_UINT16:
351n/a case FFI_TYPE_UINT32:
352n/a case FFI_TYPE_UINT64:
353n/a case FFI_TYPE_POINTER:
354n/a case FFI_TYPE_SINT8:
355n/a case FFI_TYPE_SINT16:
356n/a case FFI_TYPE_SINT32:
357n/a case FFI_TYPE_INT:
358n/a case FFI_TYPE_SINT64:
359n/a return 1;
360n/a
361n/a case FFI_TYPE_STRUCT:
362n/a if (is_hfa (ty))
363n/a {
364n/a return 1;
365n/a }
366n/a else if (ty->size > 16)
367n/a {
368n/a /* Too large. Will be replaced with a pointer to memory. The
369n/a pointer MAY be passed in a register, but the value will
370n/a not. This test specifically fails since the argument will
371n/a never be passed by value in registers. */
372n/a return 0;
373n/a }
374n/a else
375n/a {
376n/a /* Might be passed in registers depending on the number of
377n/a registers required. */
378n/a return (ty->size + 7) / 8 < N_X_ARG_REG;
379n/a }
380n/a break;
381n/a
382n/a default:
383n/a FFI_ASSERT (0);
384n/a break;
385n/a }
386n/a
387n/a return 0;
388n/a}
389n/a
390n/a/* Test if an ffi_type argument or result is a candidate for a vector
391n/a register. */
392n/a
393n/astatic int
394n/ais_v_register_candidate (ffi_type *ty)
395n/a{
396n/a return is_floating_type (ty->type)
397n/a || (ty->type == FFI_TYPE_STRUCT && is_hfa (ty));
398n/a}
399n/a
400n/a/* Representation of the procedure call argument marshalling
401n/a state.
402n/a
403n/a The terse state variable names match the names used in the AARCH64
404n/a PCS. */
405n/a
406n/astruct arg_state
407n/a{
408n/a unsigned ngrn; /* Next general-purpose register number. */
409n/a unsigned nsrn; /* Next vector register number. */
410n/a size_t nsaa; /* Next stack offset. */
411n/a
412n/a#if defined (__APPLE__)
413n/a unsigned allocating_variadic;
414n/a#endif
415n/a};
416n/a
417n/a/* Initialize a procedure call argument marshalling state. */
418n/astatic void
419n/aarg_init (struct arg_state *state, size_t call_frame_size)
420n/a{
421n/a state->ngrn = 0;
422n/a state->nsrn = 0;
423n/a state->nsaa = 0;
424n/a
425n/a#if defined (__APPLE__)
426n/a state->allocating_variadic = 0;
427n/a#endif
428n/a}
429n/a
430n/a/* Return the number of available consecutive core argument
431n/a registers. */
432n/a
433n/astatic unsigned
434n/aavailable_x (struct arg_state *state)
435n/a{
436n/a return N_X_ARG_REG - state->ngrn;
437n/a}
438n/a
439n/a/* Return the number of available consecutive vector argument
440n/a registers. */
441n/a
442n/astatic unsigned
443n/aavailable_v (struct arg_state *state)
444n/a{
445n/a return N_V_ARG_REG - state->nsrn;
446n/a}
447n/a
448n/astatic void *
449n/aallocate_to_x (struct call_context *context, struct arg_state *state)
450n/a{
451n/a FFI_ASSERT (state->ngrn < N_X_ARG_REG);
452n/a return get_x_addr (context, (state->ngrn)++);
453n/a}
454n/a
455n/astatic void *
456n/aallocate_to_s (struct call_context *context, struct arg_state *state)
457n/a{
458n/a FFI_ASSERT (state->nsrn < N_V_ARG_REG);
459n/a return get_s_addr (context, (state->nsrn)++);
460n/a}
461n/a
462n/astatic void *
463n/aallocate_to_d (struct call_context *context, struct arg_state *state)
464n/a{
465n/a FFI_ASSERT (state->nsrn < N_V_ARG_REG);
466n/a return get_d_addr (context, (state->nsrn)++);
467n/a}
468n/a
469n/astatic void *
470n/aallocate_to_v (struct call_context *context, struct arg_state *state)
471n/a{
472n/a FFI_ASSERT (state->nsrn < N_V_ARG_REG);
473n/a return get_v_addr (context, (state->nsrn)++);
474n/a}
475n/a
476n/a/* Allocate an aligned slot on the stack and return a pointer to it. */
477n/astatic void *
478n/aallocate_to_stack (struct arg_state *state, void *stack, size_t alignment,
479n/a size_t size)
480n/a{
481n/a void *allocation;
482n/a
483n/a /* Round up the NSAA to the larger of 8 or the natural
484n/a alignment of the argument's type. */
485n/a state->nsaa = ALIGN (state->nsaa, alignment);
486n/a state->nsaa = ALIGN (state->nsaa, alignment);
487n/a#if defined (__APPLE__)
488n/a if (state->allocating_variadic)
489n/a state->nsaa = ALIGN (state->nsaa, 8);
490n/a#else
491n/a state->nsaa = ALIGN (state->nsaa, 8);
492n/a#endif
493n/a
494n/a allocation = stack + state->nsaa;
495n/a
496n/a state->nsaa += size;
497n/a return allocation;
498n/a}
499n/a
500n/astatic void
501n/acopy_basic_type (void *dest, void *source, unsigned short type)
502n/a{
503n/a /* This is necessary to ensure that basic types are copied
504n/a sign extended to 64-bits as libffi expects. */
505n/a switch (type)
506n/a {
507n/a case FFI_TYPE_FLOAT:
508n/a *(float *) dest = *(float *) source;
509n/a break;
510n/a case FFI_TYPE_DOUBLE:
511n/a *(double *) dest = *(double *) source;
512n/a break;
513n/a#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
514n/a case FFI_TYPE_LONGDOUBLE:
515n/a *(long double *) dest = *(long double *) source;
516n/a break;
517n/a#endif
518n/a case FFI_TYPE_UINT8:
519n/a *(ffi_arg *) dest = *(UINT8 *) source;
520n/a break;
521n/a case FFI_TYPE_SINT8:
522n/a *(ffi_sarg *) dest = *(SINT8 *) source;
523n/a break;
524n/a case FFI_TYPE_UINT16:
525n/a *(ffi_arg *) dest = *(UINT16 *) source;
526n/a break;
527n/a case FFI_TYPE_SINT16:
528n/a *(ffi_sarg *) dest = *(SINT16 *) source;
529n/a break;
530n/a case FFI_TYPE_UINT32:
531n/a *(ffi_arg *) dest = *(UINT32 *) source;
532n/a break;
533n/a case FFI_TYPE_INT:
534n/a case FFI_TYPE_SINT32:
535n/a *(ffi_sarg *) dest = *(SINT32 *) source;
536n/a break;
537n/a case FFI_TYPE_POINTER:
538n/a case FFI_TYPE_UINT64:
539n/a *(ffi_arg *) dest = *(UINT64 *) source;
540n/a break;
541n/a case FFI_TYPE_SINT64:
542n/a *(ffi_sarg *) dest = *(SINT64 *) source;
543n/a break;
544n/a case FFI_TYPE_VOID:
545n/a break;
546n/a
547n/a default:
548n/a FFI_ASSERT (0);
549n/a }
550n/a}
551n/a
552n/astatic void
553n/acopy_hfa_to_reg_or_stack (void *memory,
554n/a ffi_type *ty,
555n/a struct call_context *context,
556n/a unsigned char *stack,
557n/a struct arg_state *state)
558n/a{
559n/a unsigned elems = element_count (ty);
560n/a if (available_v (state) < elems)
561n/a {
562n/a /* There are insufficient V registers. Further V register allocations
563n/a are prevented, the NSAA is adjusted (by allocate_to_stack ())
564n/a and the argument is copied to memory at the adjusted NSAA. */
565n/a state->nsrn = N_V_ARG_REG;
566n/a memcpy (allocate_to_stack (state, stack, ty->alignment, ty->size),
567n/a memory,
568n/a ty->size);
569n/a }
570n/a else
571n/a {
572n/a int i;
573n/a unsigned short type = get_homogeneous_type (ty);
574n/a for (i = 0; i < elems; i++)
575n/a {
576n/a void *reg = allocate_to_v (context, state);
577n/a copy_basic_type (reg, memory, type);
578n/a memory += get_basic_type_size (type);
579n/a }
580n/a }
581n/a}
582n/a
583n/a/* Either allocate an appropriate register for the argument type, or if
584n/a none are available, allocate a stack slot and return a pointer
585n/a to the allocated space. */
586n/a
587n/astatic void *
588n/aallocate_to_register_or_stack (struct call_context *context,
589n/a unsigned char *stack,
590n/a struct arg_state *state,
591n/a unsigned short type)
592n/a{
593n/a size_t alignment = get_basic_type_alignment (type);
594n/a size_t size = alignment;
595n/a switch (type)
596n/a {
597n/a case FFI_TYPE_FLOAT:
598n/a /* This is the only case for which the allocated stack size
599n/a should not match the alignment of the type. */
600n/a size = sizeof (UINT32);
601n/a /* Fall through. */
602n/a case FFI_TYPE_DOUBLE:
603n/a if (state->nsrn < N_V_ARG_REG)
604n/a return allocate_to_d (context, state);
605n/a state->nsrn = N_V_ARG_REG;
606n/a break;
607n/a#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
608n/a case FFI_TYPE_LONGDOUBLE:
609n/a if (state->nsrn < N_V_ARG_REG)
610n/a return allocate_to_v (context, state);
611n/a state->nsrn = N_V_ARG_REG;
612n/a break;
613n/a#endif
614n/a case FFI_TYPE_UINT8:
615n/a case FFI_TYPE_SINT8:
616n/a case FFI_TYPE_UINT16:
617n/a case FFI_TYPE_SINT16:
618n/a case FFI_TYPE_UINT32:
619n/a case FFI_TYPE_SINT32:
620n/a case FFI_TYPE_INT:
621n/a case FFI_TYPE_POINTER:
622n/a case FFI_TYPE_UINT64:
623n/a case FFI_TYPE_SINT64:
624n/a if (state->ngrn < N_X_ARG_REG)
625n/a return allocate_to_x (context, state);
626n/a state->ngrn = N_X_ARG_REG;
627n/a break;
628n/a default:
629n/a FFI_ASSERT (0);
630n/a }
631n/a
632n/a return allocate_to_stack (state, stack, alignment, size);
633n/a}
634n/a
635n/a/* Copy a value to an appropriate register, or if none are
636n/a available, to the stack. */
637n/a
638n/astatic void
639n/acopy_to_register_or_stack (struct call_context *context,
640n/a unsigned char *stack,
641n/a struct arg_state *state,
642n/a void *value,
643n/a unsigned short type)
644n/a{
645n/a copy_basic_type (
646n/a allocate_to_register_or_stack (context, stack, state, type),
647n/a value,
648n/a type);
649n/a}
650n/a
651n/a/* Marshall the arguments from FFI representation to procedure call
652n/a context and stack. */
653n/a
654n/astatic unsigned
655n/aaarch64_prep_args (struct call_context *context, unsigned char *stack,
656n/a extended_cif *ecif)
657n/a{
658n/a int i;
659n/a struct arg_state state;
660n/a
661n/a arg_init (&state, ALIGN(ecif->cif->bytes, 16));
662n/a
663n/a for (i = 0; i < ecif->cif->nargs; i++)
664n/a {
665n/a ffi_type *ty = ecif->cif->arg_types[i];
666n/a switch (ty->type)
667n/a {
668n/a case FFI_TYPE_VOID:
669n/a FFI_ASSERT (0);
670n/a break;
671n/a
672n/a /* If the argument is a basic type the argument is allocated to an
673n/a appropriate register, or if none are available, to the stack. */
674n/a case FFI_TYPE_FLOAT:
675n/a case FFI_TYPE_DOUBLE:
676n/a#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
677n/a case FFI_TYPE_LONGDOUBLE:
678n/a#endif
679n/a case FFI_TYPE_UINT8:
680n/a case FFI_TYPE_SINT8:
681n/a case FFI_TYPE_UINT16:
682n/a case FFI_TYPE_SINT16:
683n/a case FFI_TYPE_UINT32:
684n/a case FFI_TYPE_INT:
685n/a case FFI_TYPE_SINT32:
686n/a case FFI_TYPE_POINTER:
687n/a case FFI_TYPE_UINT64:
688n/a case FFI_TYPE_SINT64:
689n/a copy_to_register_or_stack (context, stack, &state,
690n/a ecif->avalue[i], ty->type);
691n/a break;
692n/a
693n/a case FFI_TYPE_STRUCT:
694n/a if (is_hfa (ty))
695n/a {
696n/a copy_hfa_to_reg_or_stack (ecif->avalue[i], ty, context,
697n/a stack, &state);
698n/a }
699n/a else if (ty->size > 16)
700n/a {
701n/a /* If the argument is a composite type that is larger than 16
702n/a bytes, then the argument has been copied to memory, and
703n/a the argument is replaced by a pointer to the copy. */
704n/a
705n/a copy_to_register_or_stack (context, stack, &state,
706n/a &(ecif->avalue[i]), FFI_TYPE_POINTER);
707n/a }
708n/a else if (available_x (&state) >= (ty->size + 7) / 8)
709n/a {
710n/a /* If the argument is a composite type and the size in
711n/a double-words is not more than the number of available
712n/a X registers, then the argument is copied into consecutive
713n/a X registers. */
714n/a int j;
715n/a for (j = 0; j < (ty->size + 7) / 8; j++)
716n/a {
717n/a memcpy (allocate_to_x (context, &state),
718n/a &(((UINT64 *) ecif->avalue[i])[j]),
719n/a sizeof (UINT64));
720n/a }
721n/a }
722n/a else
723n/a {
724n/a /* Otherwise, there are insufficient X registers. Further X
725n/a register allocations are prevented, the NSAA is adjusted
726n/a (by allocate_to_stack ()) and the argument is copied to
727n/a memory at the adjusted NSAA. */
728n/a state.ngrn = N_X_ARG_REG;
729n/a
730n/a memcpy (allocate_to_stack (&state, stack, ty->alignment,
731n/a ty->size), ecif->avalue + i, ty->size);
732n/a }
733n/a break;
734n/a
735n/a default:
736n/a FFI_ASSERT (0);
737n/a break;
738n/a }
739n/a
740n/a#if defined (__APPLE__)
741n/a if (i + 1 == ecif->cif->aarch64_nfixedargs)
742n/a {
743n/a state.ngrn = N_X_ARG_REG;
744n/a state.nsrn = N_V_ARG_REG;
745n/a
746n/a state.allocating_variadic = 1;
747n/a }
748n/a#endif
749n/a }
750n/a
751n/a return ecif->cif->aarch64_flags;
752n/a}
753n/a
754n/affi_status
755n/affi_prep_cif_machdep (ffi_cif *cif)
756n/a{
757n/a /* Round the stack up to a multiple of the stack alignment requirement. */
758n/a cif->bytes =
759n/a (cif->bytes + (AARCH64_STACK_ALIGN - 1)) & ~ (AARCH64_STACK_ALIGN - 1);
760n/a
761n/a /* Initialize our flags. We are interested if this CIF will touch a
762n/a vector register, if so we will enable context save and load to
763n/a those registers, otherwise not. This is intended to be friendly
764n/a to lazy float context switching in the kernel. */
765n/a cif->aarch64_flags = 0;
766n/a
767n/a if (is_v_register_candidate (cif->rtype))
768n/a {
769n/a cif->aarch64_flags |= AARCH64_FFI_WITH_V;
770n/a }
771n/a else
772n/a {
773n/a int i;
774n/a for (i = 0; i < cif->nargs; i++)
775n/a if (is_v_register_candidate (cif->arg_types[i]))
776n/a {
777n/a cif->aarch64_flags |= AARCH64_FFI_WITH_V;
778n/a break;
779n/a }
780n/a }
781n/a
782n/a return FFI_OK;
783n/a}
784n/a
785n/a#if defined (__APPLE__)
786n/a
787n/a/* Perform Apple-specific cif processing for variadic calls */
788n/affi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
789n/a unsigned int nfixedargs,
790n/a unsigned int ntotalargs)
791n/a{
792n/a cif->aarch64_nfixedargs = nfixedargs;
793n/a
794n/a return ffi_prep_cif_machdep(cif);
795n/a}
796n/a
797n/a#endif
798n/a
799n/a/* Call a function with the provided arguments and capture the return
800n/a value. */
801n/avoid
802n/affi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
803n/a{
804n/a extended_cif ecif;
805n/a
806n/a ecif.cif = cif;
807n/a ecif.avalue = avalue;
808n/a ecif.rvalue = rvalue;
809n/a
810n/a switch (cif->abi)
811n/a {
812n/a case FFI_SYSV:
813n/a {
814n/a struct call_context context;
815n/a size_t stack_bytes;
816n/a
817n/a /* Figure out the total amount of stack space we need, the
818n/a above call frame space needs to be 16 bytes aligned to
819n/a ensure correct alignment of the first object inserted in
820n/a that space hence the ALIGN applied to cif->bytes.*/
821n/a stack_bytes = ALIGN(cif->bytes, 16);
822n/a
823n/a memset (&context, 0, sizeof (context));
824n/a if (is_register_candidate (cif->rtype))
825n/a {
826n/a ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn);
827n/a switch (cif->rtype->type)
828n/a {
829n/a case FFI_TYPE_VOID:
830n/a case FFI_TYPE_FLOAT:
831n/a case FFI_TYPE_DOUBLE:
832n/a#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
833n/a case FFI_TYPE_LONGDOUBLE:
834n/a#endif
835n/a case FFI_TYPE_UINT8:
836n/a case FFI_TYPE_SINT8:
837n/a case FFI_TYPE_UINT16:
838n/a case FFI_TYPE_SINT16:
839n/a case FFI_TYPE_UINT32:
840n/a case FFI_TYPE_SINT32:
841n/a case FFI_TYPE_POINTER:
842n/a case FFI_TYPE_UINT64:
843n/a case FFI_TYPE_INT:
844n/a case FFI_TYPE_SINT64:
845n/a {
846n/a void *addr = get_basic_type_addr (cif->rtype->type,
847n/a &context, 0);
848n/a copy_basic_type (rvalue, addr, cif->rtype->type);
849n/a break;
850n/a }
851n/a
852n/a case FFI_TYPE_STRUCT:
853n/a if (is_hfa (cif->rtype))
854n/a {
855n/a int j;
856n/a unsigned short type = get_homogeneous_type (cif->rtype);
857n/a unsigned elems = element_count (cif->rtype);
858n/a for (j = 0; j < elems; j++)
859n/a {
860n/a void *reg = get_basic_type_addr (type, &context, j);
861n/a copy_basic_type (rvalue, reg, type);
862n/a rvalue += get_basic_type_size (type);
863n/a }
864n/a }
865n/a else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG)
866n/a {
867n/a size_t size = ALIGN (cif->rtype->size, sizeof (UINT64));
868n/a memcpy (rvalue, get_x_addr (&context, 0), size);
869n/a }
870n/a else
871n/a {
872n/a FFI_ASSERT (0);
873n/a }
874n/a break;
875n/a
876n/a default:
877n/a FFI_ASSERT (0);
878n/a break;
879n/a }
880n/a }
881n/a else
882n/a {
883n/a memcpy (get_x_addr (&context, 8), &rvalue, sizeof (UINT64));
884n/a ffi_call_SYSV (aarch64_prep_args, &context, &ecif,
885n/a stack_bytes, fn);
886n/a }
887n/a break;
888n/a }
889n/a
890n/a default:
891n/a FFI_ASSERT (0);
892n/a break;
893n/a }
894n/a}
895n/a
896n/astatic unsigned char trampoline [] =
897n/a{ 0x70, 0x00, 0x00, 0x58, /* ldr x16, 1f */
898n/a 0x91, 0x00, 0x00, 0x10, /* adr x17, 2f */
899n/a 0x00, 0x02, 0x1f, 0xd6 /* br x16 */
900n/a};
901n/a
902n/a/* Build a trampoline. */
903n/a
904n/a#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX,FLAGS) \
905n/a ({unsigned char *__tramp = (unsigned char*)(TRAMP); \
906n/a UINT64 __fun = (UINT64)(FUN); \
907n/a UINT64 __ctx = (UINT64)(CTX); \
908n/a UINT64 __flags = (UINT64)(FLAGS); \
909n/a memcpy (__tramp, trampoline, sizeof (trampoline)); \
910n/a memcpy (__tramp + 12, &__fun, sizeof (__fun)); \
911n/a memcpy (__tramp + 20, &__ctx, sizeof (__ctx)); \
912n/a memcpy (__tramp + 28, &__flags, sizeof (__flags)); \
913n/a ffi_clear_cache(__tramp, __tramp + FFI_TRAMPOLINE_SIZE); \
914n/a })
915n/a
916n/affi_status
917n/affi_prep_closure_loc (ffi_closure* closure,
918n/a ffi_cif* cif,
919n/a void (*fun)(ffi_cif*,void*,void**,void*),
920n/a void *user_data,
921n/a void *codeloc)
922n/a{
923n/a if (cif->abi != FFI_SYSV)
924n/a return FFI_BAD_ABI;
925n/a
926n/a FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_SYSV, codeloc,
927n/a cif->aarch64_flags);
928n/a
929n/a closure->cif = cif;
930n/a closure->user_data = user_data;
931n/a closure->fun = fun;
932n/a
933n/a return FFI_OK;
934n/a}
935n/a
936n/a/* Primary handler to setup and invoke a function within a closure.
937n/a
938n/a A closure when invoked enters via the assembler wrapper
939n/a ffi_closure_SYSV(). The wrapper allocates a call context on the
940n/a stack, saves the interesting registers (from the perspective of
941n/a the calling convention) into the context then passes control to
942n/a ffi_closure_SYSV_inner() passing the saved context and a pointer to
943n/a the stack at the point ffi_closure_SYSV() was invoked.
944n/a
945n/a On the return path the assembler wrapper will reload call context
946n/a registers.
947n/a
948n/a ffi_closure_SYSV_inner() marshalls the call context into ffi value
949n/a descriptors, invokes the wrapped function, then marshalls the return
950n/a value back into the call context. */
951n/a
952n/avoid FFI_HIDDEN
953n/affi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
954n/a void *stack)
955n/a{
956n/a ffi_cif *cif = closure->cif;
957n/a void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
958n/a void *rvalue = NULL;
959n/a int i;
960n/a struct arg_state state;
961n/a
962n/a arg_init (&state, ALIGN(cif->bytes, 16));
963n/a
964n/a for (i = 0; i < cif->nargs; i++)
965n/a {
966n/a ffi_type *ty = cif->arg_types[i];
967n/a
968n/a switch (ty->type)
969n/a {
970n/a case FFI_TYPE_VOID:
971n/a FFI_ASSERT (0);
972n/a break;
973n/a
974n/a case FFI_TYPE_UINT8:
975n/a case FFI_TYPE_SINT8:
976n/a case FFI_TYPE_UINT16:
977n/a case FFI_TYPE_SINT16:
978n/a case FFI_TYPE_UINT32:
979n/a case FFI_TYPE_SINT32:
980n/a case FFI_TYPE_INT:
981n/a case FFI_TYPE_POINTER:
982n/a case FFI_TYPE_UINT64:
983n/a case FFI_TYPE_SINT64:
984n/a case FFI_TYPE_FLOAT:
985n/a case FFI_TYPE_DOUBLE:
986n/a#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
987n/a case FFI_TYPE_LONGDOUBLE:
988n/a avalue[i] = allocate_to_register_or_stack (context, stack,
989n/a &state, ty->type);
990n/a break;
991n/a#endif
992n/a
993n/a case FFI_TYPE_STRUCT:
994n/a if (is_hfa (ty))
995n/a {
996n/a unsigned n = element_count (ty);
997n/a if (available_v (&state) < n)
998n/a {
999n/a state.nsrn = N_V_ARG_REG;
1000n/a avalue[i] = allocate_to_stack (&state, stack, ty->alignment,
1001n/a ty->size);
1002n/a }
1003n/a else
1004n/a {
1005n/a switch (get_homogeneous_type (ty))
1006n/a {
1007n/a case FFI_TYPE_FLOAT:
1008n/a {
1009n/a /* Eeek! We need a pointer to the structure,
1010n/a however the homogeneous float elements are
1011n/a being passed in individual S registers,
1012n/a therefore the structure is not represented as
1013n/a a contiguous sequence of bytes in our saved
1014n/a register context. We need to fake up a copy
1015n/a of the structure laid out in memory
1016n/a correctly. The fake can be tossed once the
1017n/a closure function has returned hence alloca()
1018n/a is sufficient. */
1019n/a int j;
1020n/a UINT32 *p = avalue[i] = alloca (ty->size);
1021n/a for (j = 0; j < element_count (ty); j++)
1022n/a memcpy (&p[j],
1023n/a allocate_to_s (context, &state),
1024n/a sizeof (*p));
1025n/a break;
1026n/a }
1027n/a
1028n/a case FFI_TYPE_DOUBLE:
1029n/a {
1030n/a /* Eeek! We need a pointer to the structure,
1031n/a however the homogeneous float elements are
1032n/a being passed in individual S registers,
1033n/a therefore the structure is not represented as
1034n/a a contiguous sequence of bytes in our saved
1035n/a register context. We need to fake up a copy
1036n/a of the structure laid out in memory
1037n/a correctly. The fake can be tossed once the
1038n/a closure function has returned hence alloca()
1039n/a is sufficient. */
1040n/a int j;
1041n/a UINT64 *p = avalue[i] = alloca (ty->size);
1042n/a for (j = 0; j < element_count (ty); j++)
1043n/a memcpy (&p[j],
1044n/a allocate_to_d (context, &state),
1045n/a sizeof (*p));
1046n/a break;
1047n/a }
1048n/a
1049n/a#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
1050n/a case FFI_TYPE_LONGDOUBLE:
1051n/a memcpy (&avalue[i],
1052n/a allocate_to_v (context, &state),
1053n/a sizeof (*avalue));
1054n/a break;
1055n/a#endif
1056n/a
1057n/a default:
1058n/a FFI_ASSERT (0);
1059n/a break;
1060n/a }
1061n/a }
1062n/a }
1063n/a else if (ty->size > 16)
1064n/a {
1065n/a /* Replace Composite type of size greater than 16 with a
1066n/a pointer. */
1067n/a memcpy (&avalue[i],
1068n/a allocate_to_register_or_stack (context, stack,
1069n/a &state, FFI_TYPE_POINTER),
1070n/a sizeof (avalue[i]));
1071n/a }
1072n/a else if (available_x (&state) >= (ty->size + 7) / 8)
1073n/a {
1074n/a avalue[i] = get_x_addr (context, state.ngrn);
1075n/a state.ngrn += (ty->size + 7) / 8;
1076n/a }
1077n/a else
1078n/a {
1079n/a state.ngrn = N_X_ARG_REG;
1080n/a
1081n/a avalue[i] = allocate_to_stack (&state, stack, ty->alignment,
1082n/a ty->size);
1083n/a }
1084n/a break;
1085n/a
1086n/a default:
1087n/a FFI_ASSERT (0);
1088n/a break;
1089n/a }
1090n/a }
1091n/a
1092n/a /* Figure out where the return value will be passed, either in
1093n/a registers or in a memory block allocated by the caller and passed
1094n/a in x8. */
1095n/a
1096n/a if (is_register_candidate (cif->rtype))
1097n/a {
1098n/a /* Register candidates are *always* returned in registers. */
1099n/a
1100n/a /* Allocate a scratchpad for the return value, we will let the
1101n/a callee scrible the result into the scratch pad then move the
1102n/a contents into the appropriate return value location for the
1103n/a call convention. */
1104n/a rvalue = alloca (cif->rtype->size);
1105n/a (closure->fun) (cif, rvalue, avalue, closure->user_data);
1106n/a
1107n/a /* Copy the return value into the call context so that it is returned
1108n/a as expected to our caller. */
1109n/a switch (cif->rtype->type)
1110n/a {
1111n/a case FFI_TYPE_VOID:
1112n/a break;
1113n/a
1114n/a case FFI_TYPE_UINT8:
1115n/a case FFI_TYPE_UINT16:
1116n/a case FFI_TYPE_UINT32:
1117n/a case FFI_TYPE_POINTER:
1118n/a case FFI_TYPE_UINT64:
1119n/a case FFI_TYPE_SINT8:
1120n/a case FFI_TYPE_SINT16:
1121n/a case FFI_TYPE_INT:
1122n/a case FFI_TYPE_SINT32:
1123n/a case FFI_TYPE_SINT64:
1124n/a case FFI_TYPE_FLOAT:
1125n/a case FFI_TYPE_DOUBLE:
1126n/a#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
1127n/a case FFI_TYPE_LONGDOUBLE:
1128n/a#endif
1129n/a {
1130n/a void *addr = get_basic_type_addr (cif->rtype->type, context, 0);
1131n/a copy_basic_type (addr, rvalue, cif->rtype->type);
1132n/a break;
1133n/a }
1134n/a case FFI_TYPE_STRUCT:
1135n/a if (is_hfa (cif->rtype))
1136n/a {
1137n/a int j;
1138n/a unsigned short type = get_homogeneous_type (cif->rtype);
1139n/a unsigned elems = element_count (cif->rtype);
1140n/a for (j = 0; j < elems; j++)
1141n/a {
1142n/a void *reg = get_basic_type_addr (type, context, j);
1143n/a copy_basic_type (reg, rvalue, type);
1144n/a rvalue += get_basic_type_size (type);
1145n/a }
1146n/a }
1147n/a else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG)
1148n/a {
1149n/a size_t size = ALIGN (cif->rtype->size, sizeof (UINT64)) ;
1150n/a memcpy (get_x_addr (context, 0), rvalue, size);
1151n/a }
1152n/a else
1153n/a {
1154n/a FFI_ASSERT (0);
1155n/a }
1156n/a break;
1157n/a default:
1158n/a FFI_ASSERT (0);
1159n/a break;
1160n/a }
1161n/a }
1162n/a else
1163n/a {
1164n/a memcpy (&rvalue, get_x_addr (context, 8), sizeof (UINT64));
1165n/a (closure->fun) (cif, rvalue, avalue, closure->user_data);
1166n/a }
1167n/a}
1168n/a