-
Notifications
You must be signed in to change notification settings - Fork 104
/
Copy pathlattice_field.h
1009 lines (819 loc) · 29.3 KB
/
lattice_field.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#pragma once
#include <iostream>
#include <quda_internal.h>
#include <comm_quda.h>
#include <util_quda.h>
#include <object.h>
#include <quda_api.h>
#include <reference_wrapper_helper.h>
/**
* @file lattice_field.h
*
* @section DESCRIPTION
*
* LatticeField is an abstract base clase for all Field objects.
*/
namespace quda {
// LatticeField is an abstract base clase for all Field objects.
// Forward declaration of all children
class LatticeField;
class ColorSpinorField;
class EigValueSet;
class cudaEigValueSet;
class cpuEigValueSet;
class EigVecSet;
class cpuEigVecSet;
class cudaEigVecSet;
class GaugeField;
class CloverField;
enum class QudaOffsetCopyMode { COLLECT, DISPERSE };
struct LatticeFieldParam {
friend class LatticeField;
/** Location of the field */
QudaFieldLocation location = QUDA_INVALID_FIELD_LOCATION;
protected:
/** Field precision */
QudaPrecision precision = QUDA_INVALID_PRECISION;
/** Ghost precision */
QudaPrecision ghost_precision = QUDA_INVALID_PRECISION;
public:
/** Field precision */
QudaPrecision Precision() const { return precision; }
/** Ghost precision */
QudaPrecision GhostPrecision() const { return ghost_precision; }
/** indicate if the param has been initialized (created with a non trivial constructor) */
bool init = false;
/** Number of field dimensions */
int nDim = 4;
/** Array storing the length of dimension */
lat_dim_t x = {};
/** Padding to be added to the checker-boarded volume (only for native field ordering) */
int pad = 0;
/** Whether the field is full or single parity */
QudaSiteSubset siteSubset = QUDA_INVALID_SITE_SUBSET;
/** The type of memory allocation to use for the field */
QudaMemoryType mem_type = QUDA_MEMORY_INVALID;
/** The type of ghost exchange to be done with this field */
QudaGhostExchange ghostExchange = QUDA_GHOST_EXCHANGE_PAD;
/** The extended field radius (if applicable) */
lat_dim_t r = {};
/** For fixed-point fields that need a global scaling factor */
double scale = 1.0;
/**
@brief Default constructor for LatticeFieldParam
*/
LatticeFieldParam() = default;
/**
@brief Constructor for creating a LatticeFieldParam from a set of parameters
@param[in] nDim Number of field dimensions
@param[in] x Array of dimension lengths
@param[in] pad Field padding
@param[in] precision Field Precision
@param[in] ghostExchange Type of ghost exchange
*/
LatticeFieldParam(int nDim, const lat_dim_t &x, int pad, QudaFieldLocation location, QudaPrecision precision,
QudaGhostExchange ghostExchange = QUDA_GHOST_EXCHANGE_PAD) :
location(location),
precision(precision),
ghost_precision(precision),
init(true),
nDim(nDim),
pad(pad),
siteSubset(QUDA_FULL_SITE_SUBSET),
mem_type(location == QUDA_CUDA_FIELD_LOCATION ? QUDA_MEMORY_DEVICE : QUDA_MEMORY_HOST),
ghostExchange(ghostExchange),
scale(1.0)
{
if (nDim > QUDA_MAX_DIM) errorQuda("Number of dimensions too great");
for (int i = 0; i < QUDA_MAX_DIM; i++) {
this->x[i] = i < nDim ? x[i] : 0;
this->r[i] = 0;
}
}
/**
@brief Constructor for creating a LatticeFieldParam from a
QudaGaugeParam. Used for wrapping around a CPU reference
field.
@param[in] param Contains the metadata for filling out the LatticeFieldParam
*/
LatticeFieldParam(const QudaGaugeParam ¶m) :
location(param.location),
precision(param.cpu_prec),
ghost_precision(param.cpu_prec),
init(true),
nDim(4),
pad(0),
siteSubset(QUDA_FULL_SITE_SUBSET),
mem_type(QUDA_MEMORY_HOST),
ghostExchange(QUDA_GHOST_EXCHANGE_NO),
scale(param.scale)
{
for (int i = 0; i < QUDA_MAX_DIM; i++) {
this->x[i] = i < nDim ? param.X[i] : 0;
this->r[i] = 0;
}
}
/**
@brief Constructor for creating LatticeFieldParam from a LatticeField
*/
LatticeFieldParam(const LatticeField &field);
};
std::ostream& operator<<(std::ostream& output, const LatticeFieldParam& param);
std::ostream &operator<<(std::ostream &output, const LatticeField &field);
class LatticeField : public Object {
friend std::ostream &operator<<(std::ostream &output, const LatticeField ¶m);
/**
@brief Create the field as specified by the param
@param[in] Parameter struct
*/
void create(const LatticeFieldParam ¶m);
/**
@brief Move the contents of a field to this
@param[in,out] other Field we are moving from
*/
void move(LatticeField &&other);
protected:
/** Lattice volume */
size_t volume = 0;
/** Checkerboarded volume */
size_t volumeCB = 0;
/** Local lattice volume */
size_t localVolume = 0;
/** Checkerboarded local volume */
size_t localVolumeCB = 0;
/** Stride used for native field ordering (stride = volumeCB + pad) */
size_t stride = 0;
/** Padding to be added to the checker-boarded volume (only for native field ordering) */
int pad = 0;
/** Total size of the allocation */
size_t total_bytes = 0;
/** Number of field dimensions */
int nDim = 0;
/** Array storing the length of dimension */
lat_dim_t x = {};
/** The extended lattice radius (if applicable) */
lat_dim_t r = {};
/** Array storing the local dimensions (x - 2 * r) */
lat_dim_t local_x = {};
/** Array storing the surface size in each dimension */
lat_dim_t surface = {};
/** Array storing the checkerboarded surface size in each dimension */
lat_dim_t surfaceCB = {};
/** Array storing the local surface size in each dimension */
lat_dim_t local_surface = {};
/** Array storing the local surface size in each dimension */
lat_dim_t local_surfaceCB = {};
/** Location of the field */
QudaFieldLocation location = QUDA_INVALID_FIELD_LOCATION;
/** Precision of the field */
QudaPrecision precision = QUDA_INVALID_PRECISION;
/** Precision of the ghost */
mutable QudaPrecision ghost_precision = QUDA_INVALID_PRECISION;
/** Bool which is triggered if the ghost precision is reset */
mutable bool ghost_precision_reset = false;
/** For fixed-point fields that need a global scaling factor */
double scale = 0.0;
/** Whether the field is full or single parity */
QudaSiteSubset siteSubset = QUDA_INVALID_SITE_SUBSET;
/** Type of ghost exchange to perform */
QudaGhostExchange ghostExchange = QUDA_GHOST_EXCHANGE_INVALID;
/** The number of dimensions we partition for communication */
int nDimComms = 0;
/*
The need for persistent message handlers (for GPUDirect support)
means that we allocate different message handlers for each number of
faces we can send.
*/
/**
Double buffered static GPU halo send buffer
*/
inline static array<void *, 2> ghost_send_buffer_d = {};
/**
Double buffered static GPU halo receive buffer
*/
inline static array<void *, 2> ghost_recv_buffer_d = {};
/**
Double buffered static pinned send buffers
*/
inline static array<void *, 2> ghost_pinned_send_buffer_h = {};
/**
Double buffered static pinned recv buffers
*/
inline static array<void *, 2> ghost_pinned_recv_buffer_h = {};
/**
Mapped version of pinned send buffers
*/
inline static array<void *, 2> ghost_pinned_send_buffer_hd = {};
/**
Mapped version of pinned recv buffers
*/
inline static array<void *, 2> ghost_pinned_recv_buffer_hd = {};
/**
Remote ghost pointer for sending to
*/
inline static array_3d<void *, 2, QUDA_MAX_DIM, 2> ghost_remote_send_buffer_d;
/**
The current size of the static ghost allocation
*/
inline static size_t ghostFaceBytes = 0;
/**
Whether the ghost buffers have been initialized
*/
inline static bool initGhostFaceBuffer = false;
/**
Size in bytes of this ghost field
*/
mutable size_t ghost_bytes = 0;
/**
Size in bytes of prior ghost allocation
*/
mutable size_t ghost_bytes_old = 0;
/**
Size in bytes of the ghost in each dimension
*/
mutable array<size_t, QUDA_MAX_DIM> ghost_face_bytes = {};
/**
Actual allocated size in bytes of the ghost in each dimension
*/
mutable array<size_t, QUDA_MAX_DIM> ghost_face_bytes_aligned = {};
/**
Byte offsets to each ghost zone
*/
mutable array_2d<size_t, QUDA_MAX_DIM, 2> ghost_offset = {};
/**
Pinned memory buffer used for sending messages
*/
mutable array<void *, 2> my_face_h = {};
/**
Mapped version of my_face_h
*/
mutable array<void *, 2> my_face_hd = {};
/**
Device memory buffer for sending messages
*/
mutable array<void *, 2> my_face_d = {};
/**
Local pointers to the pinned my_face buffer
*/
mutable array_3d<void *, 2, QUDA_MAX_DIM, 2> my_face_dim_dir_h = {};
/**
Local pointers to the mapped my_face buffer
*/
mutable array_3d<void *, 2, QUDA_MAX_DIM, 2> my_face_dim_dir_hd = {};
/**
Local pointers to the device ghost_send buffer
*/
mutable array_3d<void *, 2, QUDA_MAX_DIM, 2> my_face_dim_dir_d = {};
/**
Memory buffer used for receiving all messages
*/
mutable array<void *, 2> from_face_h = {};
/**
Mapped version of from_face_h
*/
mutable array<void *, 2> from_face_hd = {};
/**
Device memory buffer for receiving messages
*/
mutable array<void *, 2> from_face_d = {};
/**
Local pointers to the pinned from_face buffer
*/
mutable array_3d<void *, 2, QUDA_MAX_DIM, 2> from_face_dim_dir_h = {};
/**
Local pointers to the mapped from_face buffer
*/
mutable array_3d<void *, 2, QUDA_MAX_DIM, 2> from_face_dim_dir_hd = {};
/**
Local pointers to the device ghost_recv buffer
*/
mutable array_3d<void *, 2, QUDA_MAX_DIM, 2> from_face_dim_dir_d = {};
/**
Message handles for receiving
*/
mutable array_3d<MsgHandle *, 2, QUDA_MAX_DIM, 2> mh_recv = {};
/**
Message handles for sending
*/
mutable array_3d<MsgHandle *, 2, QUDA_MAX_DIM, 2> mh_send = {};
/**
Message handles for receiving
*/
mutable array_3d<MsgHandle *, 2, QUDA_MAX_DIM, 2> mh_recv_rdma = {};
/**
Message handles for sending
*/
mutable array_3d<MsgHandle *, 2, QUDA_MAX_DIM, 2> mh_send_rdma = {};
/**
Message handles for receiving
*/
inline static array_3d<MsgHandle *, 2, QUDA_MAX_DIM, 2> mh_recv_p2p = {};
/**
Message handles for sending
*/
inline static array_3d<MsgHandle *, 2, QUDA_MAX_DIM, 2> mh_send_p2p = {};
/**
Buffer used by peer-to-peer message handler
*/
inline static array_3d<int, 2, QUDA_MAX_DIM, 2> buffer_send_p2p = {};
/**
Buffer used by peer-to-peer message handler
*/
inline static array_3d<int, 2, QUDA_MAX_DIM, 2> buffer_recv_p2p = {};
/**
Local copy of event used for peer-to-peer synchronization
*/
inline static array_3d<qudaEvent_t, 2, QUDA_MAX_DIM, 2> ipcCopyEvent = {};
/**
Remote copy of event used for peer-to-peer synchronization
*/
inline static array_3d<qudaEvent_t, 2, QUDA_MAX_DIM, 2> ipcRemoteCopyEvent = {};
/**
Whether we have initialized communication for this field
*/
mutable bool initComms = false;
/**
Whether we have initialized peer-to-peer communication
*/
inline static bool initIPCComms = false;
/**
Bool which is triggered if the ghost field is reset
*/
inline static bool ghost_field_reset = false;
/**
Used as a label in the autotuner
*/
std::string vol_string;
/**
Used as a label in the autotuner
*/
std::string aux_string;
/**
Sets the vol_string for use in tuning
*/
virtual void setTuningString();
/**
The type of allocation we are going to do for this field
*/
QudaMemoryType mem_type = QUDA_MEMORY_INVALID;
void precisionCheck()
{
switch (precision) {
case QUDA_QUARTER_PRECISION:
case QUDA_HALF_PRECISION:
case QUDA_SINGLE_PRECISION:
case QUDA_DOUBLE_PRECISION: break;
default: errorQuda("Unknown precision %d", precision);
}
}
mutable std::vector<quda_ptr> backup_h = {};
public:
/**
Static variable that is determined which ghost buffer we are using
*/
inline static int bufferIndex = 0;
/**
@brief Default constructor
*/
LatticeField() = default;
/**
@brief Copy constructor for creating a LatticeField from another LatticeField
@param field Instance of LatticeField from which we are cloning
*/
LatticeField(const LatticeField &field) noexcept;
/**
@brief Move constructor for creating a LatticeField from another LatticeField
@param field Instance of LatticeField from which we are moving
*/
LatticeField(LatticeField &&field) noexcept;
/**
@brief Constructor for creating a LatticeField from a LatticeFieldParam
@param param Contains the metadata for creating the field
*/
LatticeField(const LatticeFieldParam ¶m);
/**
@brief Destructor for LatticeField
*/
virtual ~LatticeField();
/**
@brief Copy assignment operator
@param[in] field Instance from which we are copying
@return Reference to this field
*/
LatticeField &operator=(const LatticeField &);
/**
@brief Move assignment operator
@param[in] field Instance from which we are moving
@return Reference to this field
*/
LatticeField &operator=(LatticeField &&);
/**
@brief Fills the param with this field's meta data (used for
creating a cloned field)
@param[in] param The parameter we are filling
*/
void fill(LatticeFieldParam ¶m) const;
/**
@brief Allocate the static ghost buffers
@param[in] ghost_bytes Size of the ghost buffer to allocate
*/
void allocateGhostBuffer(size_t ghost_bytes) const;
/**
@brief Free statically allocated ghost buffers
*/
static void freeGhostBuffer(void);
/**
Create the communication handlers (both host and device)
@param[in] no_comms_fill Whether to allocate halo buffers for
dimensions that are not partitioned
*/
void createComms(bool no_comms_fill = false) const;
/**
Destroy the communication handlers
*/
void destroyComms() const;
/**
Create the inter-process communication handlers
*/
void createIPCComms() const;
/**
Destroy the statically allocated inter-process communication handlers
*/
static void destroyIPCComms();
/**
Helper function to determine if local-to-remote (send) peer-to-peer copy is complete
*/
inline bool ipcCopyComplete(int dir, int dim);
/**
Helper function to determine if local-to-remote (receive) peer-to-peer copy is complete
*/
inline bool ipcRemoteCopyComplete(int dir, int dim);
/**
Handle to local copy event used for peer-to-peer synchronization
*/
const qudaEvent_t &getIPCCopyEvent(int dir, int dim) const;
/**
Handle to remote copy event used for peer-to-peer synchronization
*/
const qudaEvent_t &getIPCRemoteCopyEvent(int dir, int dim) const;
/**
@return The dimension of the lattice
*/
int Ndim() const { return nDim; }
/**
@return The pointer to the lattice-dimension array
*/
const auto &X() const { return x; }
/**
@return Extended field radius
*/
const auto &R() const { return r; }
/**
@return Local checkboarded lattice dimensions
*/
const auto &LocalX() const { return local_x; }
/**
@return The pointer to the **full** lattice-dimension array
*/
virtual int full_dim(int d) const = 0;
/**
@return The full-field volume
*/
size_t Volume() const { return volume; }
/**
@return The single-parity volume
*/
size_t VolumeCB() const { return volumeCB; }
/**
@return The local full-field volume without any overlapping region
*/
size_t LocalVolume() const { return localVolume; }
/**
@return The local single-parity volume without any overlapping region
*/
size_t LocalVolumeCB() const { return localVolumeCB; }
/**
@param i The dimension of the requested surface
@return The single-parity surface of dimension i
*/
const auto &SurfaceCB() const { return surfaceCB; }
/**
@param i The dimension of the requested surface
@return The single-parity surface of dimension i
*/
int SurfaceCB(const int i) const { return surfaceCB[i]; }
/**
@return The single-parity local surface array
*/
const auto &LocalSurfaceCB() const { return local_surfaceCB; }
/**
@param i The dimension of the requested local surface
@return The single-parity local surface of dimension i
*/
int LocalSurfaceCB(const int i) const { return local_surfaceCB[i]; }
/**
@return The single-parity stride of the field
*/
size_t Stride() const { return stride; }
/**
@return The field padding
*/
int Pad() const { return pad; }
/**
@return Type of ghost exchange
*/
QudaGhostExchange GhostExchange() const { return ghostExchange; }
/**
@return The field precision
*/
QudaPrecision Precision() const { return precision; }
/**
@return The ghost precision
*/
QudaPrecision GhostPrecision() const { return ghost_precision; }
/**
@return The global scaling factor for a fixed-point field
*/
double Scale() const { return scale; }
/**
@brief Set the scale factor for a fixed-point field
@param[in] scale_ The new scale factor
*/
void Scale(double scale_) { scale = scale_; }
/**
@return Field subset type
*/
QudaSiteSubset SiteSubset() const { return siteSubset; }
/**
@return Mem type
*/
virtual QudaMemoryType MemType() const { return mem_type; }
/**
@return The vector storage length used for native fields , 2
for Float2, 4 for Float4
*/
int Nvec() const;
/**
@return The location of the field
*/
QudaFieldLocation Location() const { return location; }
/**
@return The total storage allocated
*/
size_t GBytes() const { return total_bytes / (1<<30); }
/**
Check that the metadata of *this and a are compatible
@param a The LatticeField to which we are comparing
*/
void checkField(const LatticeField &a) const;
/**
Read in the field specified by filenemae
@param filename The name of the file to read
*/
virtual void read(char *filename);
/**
Write the field in the file specified by filename
@param filename The name of the file to write
*/
virtual void write(char *filename);
/**
@brief Return pointer to the local pinned my_face buffer in a
given direction and dimension
@param[in] dir Direction we are requesting
@param[in] dim Dimension we are requesting
@return Pointer to pinned memory buffer
*/
void *myFace_h(int dir, int dim) const;
/**
@brief Return pointer to the local mapped my_face buffer in a
given direction and dimension
@param[in] dir Direction we are requesting
@param[in] dim Dimension we are requesting
@return Pointer to pinned memory buffer
*/
void *myFace_hd(int dir, int dim) const;
/**
@brief Return pointer to the device send buffer in a given
direction and dimension
@param[in] dir Direction we are requesting
@param[in] dim Dimension we are requesting
@return Pointer to pinned memory buffer
*/
void *myFace_d(int dir, int dim) const;
/**
@brief Return base pointer to a remote device buffer for direct
sending in a given direction and dimension. Since this is a
base pointer, one still needs to take care of offsetting to the
correct point for each direction/dimension.
@param[in] dir Direction we are requesting
@param[in] dim Dimension we are requesting
@return Pointer to remote memory buffer
*/
void *remoteFace_d(int dir, int dim) const;
/**
@brief Return base pointer to the ghost recv buffer. Since this is a
base pointer, one still needs to take care of offsetting to the
correct point for each direction/dimension.
@return Pointer to remote memory buffer
*/
void *remoteFace_r() const;
virtual void gather(int, const qudaStream_t &) const { errorQuda("Not implemented"); }
virtual void commsStart(int, const qudaStream_t &, bool, bool) const { errorQuda("Not implemented"); }
virtual int commsQuery(int, const qudaStream_t &, bool, bool) const
{
errorQuda("Not implemented");
return 0;
}
virtual void commsWait(int, const qudaStream_t &, bool, bool) const { errorQuda("Not implemented"); }
virtual void scatter(int, const qudaStream_t &) const { errorQuda("Not implemented"); }
/** Return the volume string used by the autotuner */
auto VolString() const { return vol_string; }
/** Return the aux string used by the autotuner */
auto AuxString() const { return aux_string; }
/** @brief Backs up the LatticeField */
virtual void backup() const { errorQuda("Not implemented"); }
/** @brief Restores the LatticeField */
virtual void restore() const { errorQuda("Not implemented"); }
/**
@brief If managed memory and prefetch is enabled, prefetch
all relevant memory fields to the current device or to the CPU.
@param[in] mem_space Memory space we are prefetching to
*/
virtual void prefetch(QudaFieldLocation, qudaStream_t = device::get_default_stream()) const { ; }
virtual bool isNative() const = 0;
/**
@brief Return the number of bytes in the field allocation.
*/
virtual size_t Bytes() const = 0;
/**
@brief Copy all contents of the field to a host buffer.
@param[in] the host buffer to copy to.
*** Currently `buffer` has to be a host pointer:
passing in UVM or device pointer leads to undefined behavior. ***
*/
virtual void copy_to_buffer(void *buffer) const = 0;
/**
@brief Copy all contents of the field from a host buffer to this field.
@param[in] the host buffer to copy from.
*** Currently `buffer` has to be a host pointer:
passing in UVM or device pointer leads to undefined behavior. ***
*/
virtual void copy_from_buffer(void *buffer) = 0;
};
/**
@brief Helper function for determining if the location of the fields is the same.
@param[in] a Input field
@param[in] b Input field
@return If location is unique return the location
*/
template <typename T1, typename T2>
inline QudaFieldLocation Location_(const char *func, const char *file, int line, const T1 &a_, const T2 &b_)
{
const unwrap_t<T1> &a(a_);
const unwrap_t<T2> &b(b_);
QudaFieldLocation location = QUDA_INVALID_FIELD_LOCATION;
if (a.Location() == b.Location()) location = a.Location();
else
errorQuda("Locations %d %d do not match (%s:%d in %s())", a.Location(), b.Location(), file, line, func);
return location;
}
/**
@brief Helper function for determining if the location of the fields is the same.
@param[in] a Input field
@param[in] b Input field
@param[in] args List of additional fields to check location on
@return If location is unique return the location
*/
template <typename T1, typename T2, typename... Args>
inline QudaFieldLocation Location_(const char *func, const char *file, int line, const T1 &a, const T2 &b,
const Args &...args)
{
return static_cast<QudaFieldLocation>(Location_(func,file,line,a,b) & Location_(func,file,line,a,args...));
}
#define checkLocation(...) Location_(__func__, __FILE__, __LINE__, __VA_ARGS__)
/**
@brief Helper function for determining if the precision of the fields is the same.
@param[in] a Input field
@param[in] b Input field
@return If precision is unique return the precision
*/
template <typename T1, typename T2>
inline QudaPrecision Precision_(const char *func, const char *file, int line, const T1 &a_, const T2 &b_)
{
const unwrap_t<T1> &a(a_);
const unwrap_t<T2> &b(b_);
QudaPrecision precision = QUDA_INVALID_PRECISION;
if (a.Precision() == b.Precision()) precision = a.Precision();
else
errorQuda("Precisions %d %d do not match (%s:%d in %s())", a.Precision(), b.Precision(), file, line, func);
return precision;
}
/**
@brief Helper function for determining if the precision of the fields is the same.
@param[in] a Input field
@param[in] b Input field
@param[in] args List of additional fields to check precision on
@return If precision is unique return the precision
*/
template <typename T1, typename T2, typename... Args>
inline QudaPrecision Precision_(const char *func, const char *file, int line, const T1 &a, const T2 &b,
const Args &...args)
{
return static_cast<QudaPrecision>(Precision_(func,file,line,a,b) & Precision_(func,file,line,a,args...));
}
#define checkPrecision(...) Precision_(__func__, __FILE__, __LINE__, __VA_ARGS__)
/**
@brief Helper function for determining if the color of the fields is the same.
@param[in] a Input field
@param[in] b Input field
@return If color is unique return the number of colors
*/
template <typename T1, typename T2>
inline int Color_(const char *func, const char *file, int line, const T1 &a_, const T2 &b_)
{
const unwrap_t<T1> &a(a_);
const unwrap_t<T2> &b(b_);
int nColor = 0;
if (a.Ncolor() == b.Ncolor())
nColor = a.Ncolor();
else
errorQuda("Color %d %d do not match (%s:%d in %s())", a.Ncolor(), b.Ncolor(), file, line, func);
return nColor;
}
/**
@brief Helper function for determining if the color of the fields is the same.
@param[in] a Input field
@param[in] b Input field
@param[in] args List of additional fields to check color on
@return If colors is unique return the number of colors
*/
template <typename T1, typename T2, typename... Args>
inline int Color_(const char *func, const char *file, int line, const T1 &a, const T2 &b, const Args &...args)
{
return Color_(func, file, line, a, b) & Color_(func, file, line, a, args...);
}
#define checkColor(...) Color_(__func__, __FILE__, __LINE__, __VA_ARGS__)
/**
@brief Helper function for determining if the field is in native order
@param[in] a Input field
@return true if field is in native order
*/
template <typename T> inline bool Native_(const char *func, const char *file, int line, const T &a_)
{
const unwrap_t<T> &a(a_);
if (!a.isNative()) errorQuda("Non-native field detected (%s:%d in %s())", file, line, func);
return true;
}
/**
@brief Helper function for determining if the fields are in native order
@param[in] a Input field
@param[in] args List of additional fields to check
@return true if all fields are in native order
*/
template <typename T, typename... Args>
inline bool Native_(const char *func, const char *file, int line, const T &a, const Args &...args)
{
return (Native_(func, file, line, a) && Native_(func, file, line, args...));
}
#define checkNative(...) Native_(__func__, __FILE__, __LINE__, __VA_ARGS__)
/**
@brief Return whether data is reordered on the CPU or GPU. This can set
at QUDA initialization using the environment variable
QUDA_REORDER_LOCATION.
@return Reorder location
*/
QudaFieldLocation reorder_location();
/**
@brief Set whether data is reorderd on the CPU or GPU. This can set at
QUDA initialization using the environment variable
QUDA_REORDER_LOCATION.
@param reorder_location_ The location to set where data will be reordered
*/
void reorder_location_set(QudaFieldLocation reorder_location_);
/**
@brief Helper function for setting auxilary string
@param[in] meta LatticeField used for querying field location
@return String containing location and compilation type
*/
inline const char *compile_type_str(const LatticeField &meta, QudaFieldLocation location_ = QUDA_INVALID_FIELD_LOCATION)
{
QudaFieldLocation location = (location_ == QUDA_INVALID_FIELD_LOCATION ? meta.Location() : location_);
#ifdef JITIFY
return location == QUDA_CUDA_FIELD_LOCATION ? "GPU-jitify," : "CPU,";
#else
return location == QUDA_CUDA_FIELD_LOCATION ? "GPU-offline," : "CPU,";
#endif
}
/**
@brief Helper function for setting auxilary string
@return String containing location and compilation type
*/
inline const char *compile_type_str(QudaFieldLocation location = QUDA_INVALID_FIELD_LOCATION)