-
Notifications
You must be signed in to change notification settings - Fork 29
/
Copy pathSSEdefs.h
82 lines (76 loc) · 2.07 KB
/
SSEdefs.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#ifndef __SSEDEFS_H__
#define __SSEDEFS_H__
#include "cosmoType.h"
#if CMK_USE_AVX
#if !defined(__AVX__)
#undef CMK_USE_AVX
#define CMK_USE_AVX 0
#else
#warning "using AVX"
#endif
#endif
#if CMK_USE_SSE2 && !defined(__SSE2__)
#undef CMK_USE_SSE2
#define CMK_USE_SSE2 0
#endif
#if CMK_USE_AVX || CMK_USE_SSE2
#define CMK_SSE 1
#endif
#if CMK_USE_AVX
#ifdef COSMO_FLOAT
#error "single-precision AVX is not supported"
#else
#include "SSE-Double.h"
#define SSE_VECTOR_WIDTH 4
#define FORCE_INPUT_LIST_PAD 3
typedef SSEDouble SSEcosmoType;
#define SSELoad(where, arr, idx, field) where(arr[idx]field, arr[idx+1]field, arr[idx+2]field, arr[idx+3]field)
#define SSEStore(what, arr, idx, field) { \
double p[4]; \
storeu(p, what); \
arr[idx]field = p[0]; \
arr[idx+1]field = p[1]; \
arr[idx+2]field = p[2]; \
arr[idx+3]field = p[3]; \
}
enum {cosmoMask=0xf};
#endif
#elif CMK_USE_SSE2
#ifdef COSMO_FLOAT
#define SSE_COSMO_FLOAT
#if defined(__SSE2__)
#include "SSE-Float.h"
#define SSE_VECTOR_WIDTH 4
#define FORCE_INPUT_LIST_PAD 3
typedef SSEFloat SSEcosmoType;
#define SSELoad(where, arr, idx, field) where(arr[idx]field, arr[idx+1]field, arr[idx+2]field, arr[idx+3]field)
#define SSEStore(what, arr, idx, field) { \
float p[4]; \
storeu(p, what); \
arr[idx]field = p[0]; \
arr[idx+1]field = p[1]; \
arr[idx+2]field = p[2]; \
arr[idx+3]field = p[3]; \
}
enum {cosmoMask=0xf};
#else
#error("SSE not available");
#endif
#else
#if defined(__SSE2__) && !defined(SSE_COSMO_FLOAT)
#include "SSE-Double.h"
#define SSE_VECTOR_WIDTH 2
#define FORCE_INPUT_LIST_PAD 1
typedef SSEDouble SSEcosmoType;
#define SSELoad(where, arr, idx, field) where(arr[idx]field, arr[idx+1]field)
#define SSEStore(what, arr, idx, field) { \
storel(&arr[idx]field, what); \
storeh(&arr[idx+1]field, what); \
}
enum {cosmoMask=0x3};
#else
#error("SSE not available");
#endif
#endif
#endif
#endif